Spaces:

akhellad
/

SurgiTrackDemo

Runtime error

App Files Files Community

akhellad commited on 11 days ago

Commit

26a3529

1 Parent(s): 9e5c05e

Initial commit

Browse files

Files changed (4) hide show

README.md +52 -5
app.py +375 -0
requirements.txt +7 -0
tracker.py +379 -0

README.md CHANGED Viewed

@@ -1,12 +1,59 @@
 ---
-title: SurgiTrackDemo
-emoji: 📊
-colorFrom: indigo
 colorTo: indigo
 sdk: gradio
-sdk_version: 6.1.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SurgiTrack - Surgical Tool Tracking
+emoji: 🔬
+colorFrom: purple
 colorTo: indigo
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# SurgiTrack - Surgical Tool Tracking
+Multi-class multi-tool tracking system for laparoscopic surgery videos.
+## Overview
+This demo implements the tracking pipeline from ["SurgiTrack: Fine-Grained Multi-Class Multi-Tool Tracking in Surgical Videos"](https://arxiv.org/abs/2312.07352), trained and evaluated on the CholecTrack20 dataset.
+## Pipeline
+1. **Detection**: YOLOv11x trained on 7 surgical tool classes
+2. **Direction Estimation**: EfficientNet-B0 + Coordinate Attention predicts operator (MSLH, MSRH, ASRH)
+3. **Tracking**: Operator-based slot assignment for graspers, fixed IDs for other tools
+## Results
+| Metric | Score |
+|--------|-------|
+| HOTA | 64.48% |
+| AssA | 71.19% |
+| DetA | 58.51% |
+## Tool Classes
+- Grasper (tracked by operator)
+- Bipolar
+- Hook
+- Scissors
+- Clipper
+- Irrigator
+- Specimen Bag
+## Citation
+```bibtex
+@InProceedings{nwoye2023cholectrack20,
+  author    = {Nwoye, Chinedu Innocent and Elgohary, Kareem and Srinivas, Anvita and Zaid, Fauzan and Lavanchy, Joël L. and Padoy, Nicolas},
+  title     = {CholecTrack20: A Multi-Perspective Tracking Dataset for Surgical Tools},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year      = {2025},
+  month     = {June}
+}
+```
+## Author
+[Djalil Khelladi](https://github.com/akhellad)

app.py ADDED Viewed

	@@ -0,0 +1,375 @@

+"""
+SurgiTrack Demo - Surgical Tool Tracking
+Based on CholecTrack20 dataset (Nwoye et al., CVPR 2025)
+"""
+import os
+import gradio as gr
+import cv2
+import numpy as np
+import torch
+from pathlib import Path
+from collections import deque
+# Import models (will be loaded on startup)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+YOLO_MODEL = None
+DIRECTION_MODEL = None
+TRACKER = None
+CLASS_NAMES = ['grasper', 'bipolar', 'hook', 'scissors', 'clipper', 'irrigator', 'specimenbag']
+COLORS = {
+    'grasper': (255, 100, 100),
+    'bipolar': (100, 255, 100),
+    'hook': (100, 100, 255),
+    'scissors': (255, 255, 100),
+    'clipper': (255, 100, 255),
+    'irrigator': (100, 255, 255),
+    'specimenbag': (200, 200, 200),
+}
+OPERATOR_COLORS = {
+    0: (0, 255, 0),    # MSLH - Green
+    1: (0, 0, 255),    # MSRH - Red
+    2: (255, 165, 0),  # ASRH - Orange
+    3: (128, 128, 128) # NULL - Gray
+}
+def load_models():
+    """Load YOLO and Direction Estimator models"""
+    global YOLO_MODEL, DIRECTION_MODEL, TRACKER
+    from ultralytics import YOLO
+    from tracker import DirectionEstimator, OperatorBasedTracker
+    # Load YOLO
+    yolo_path = "weights/best.pt"
+    if os.path.exists(yolo_path):
+        YOLO_MODEL = YOLO(yolo_path)
+        print(f"YOLO model loaded from {yolo_path}")
+    else:
+        print(f"Warning: YOLO model not found at {yolo_path}")
+        return False
+    # Load Direction Estimator
+    direction_path = "weights/direction_estimator.pth"
+    if os.path.exists(direction_path):
+        DIRECTION_MODEL = DirectionEstimator(num_classes=4, pretrained=False)
+        checkpoint = torch.load(direction_path, map_location=DEVICE, weights_only=False)
+        DIRECTION_MODEL.load_state_dict(checkpoint['model_state_dict'])
+        DIRECTION_MODEL.to(DEVICE)
+        DIRECTION_MODEL.eval()
+        print(f"Direction model loaded from {direction_path}")
+    else:
+        print(f"Warning: Direction model not found at {direction_path}")
+        DIRECTION_MODEL = None
+    # Initialize tracker
+    TRACKER = OperatorBasedTracker(
+        direction_model=DIRECTION_MODEL,
+        max_inactive_frames=150,
+        iou_threshold=0.2,
+        direction_confidence_threshold=0.4,
+        device=DEVICE
+    )
+    return True
+def draw_tracking_results(frame, slots, trajectories, frame_count):
+    """Draw bounding boxes, IDs, and trajectories on frame"""
+    for slot in slots:
+        if slot.bbox is None:
+            continue
+        x1, y1, x2, y2 = slot.bbox.astype(int)
+        track_id = slot.track_id
+        class_name = slot.class_name
+        # Update trajectory
+        center = (int((x1 + x2) / 2), int((y1 + y2) / 2))
+        if track_id not in trajectories:
+            trajectories[track_id] = deque(maxlen=30)
+        trajectories[track_id].append(center)
+        # Get colors
+        bbox_color = COLORS.get(class_name, (255, 255, 255))
+        op_color = OPERATOR_COLORS.get(slot.operator_id, (128, 128, 128))
+        # Draw bbox
+        cv2.rectangle(frame, (x1, y1), (x2, y2), bbox_color, 2)
+        # Draw operator indicator
+        cv2.circle(frame, (x2 - 10, y1 + 10), 8, op_color, -1)
+        cv2.circle(frame, (x2 - 10, y1 + 10), 8, (0, 0, 0), 1)
+        # Draw label
+        label = f"ID:{track_id} {class_name}"
+        (lw, lh), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        cv2.rectangle(frame, (x1, y1 - lh - 8), (x1 + lw + 4, y1), bbox_color, -1)
+        cv2.putText(frame, label, (x1 + 2, y1 - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
+        # Draw trajectory
+        traj = list(trajectories[track_id])
+        for i in range(1, len(traj)):
+            alpha = i / len(traj)
+            thickness = max(1, int(alpha * 3))
+            color = tuple(int(c * alpha) for c in bbox_color)
+            cv2.line(frame, traj[i-1], traj[i], color, thickness)
+    # Draw frame counter
+    cv2.putText(frame, f"Frame: {frame_count}", (10, 30),
+                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
+    return frame, trajectories
+def process_video_live(video_path, confidence_threshold, progress=gr.Progress()):
+    """Process video with live inference"""
+    global YOLO_MODEL, TRACKER
+    if YOLO_MODEL is None:
+        return None, "Error: Models not loaded"
+    from tracker import Detection
+    # Reset tracker
+    TRACKER.reset()
+    cap = cv2.VideoCapture(video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    # Output video
+    output_path = "/tmp/output_tracked.mp4"
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    trajectories = {}
+    frame_count = 0
+    total_detections = 0
+    unique_tracks = set()
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # YOLO detection
+        results = YOLO_MODEL.predict(frame, conf=confidence_threshold, verbose=False)
+        detections = []
+        if len(results) > 0 and results[0].boxes is not None:
+            boxes = results[0].boxes
+            for i in range(len(boxes)):
+                class_id = int(boxes.cls[i])
+                detections.append(Detection(
+                    bbox=boxes.xyxy[i].cpu().numpy(),
+                    class_id=class_id,
+                    class_name=CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else "unknown",
+                    confidence=float(boxes.conf[i]),
+                    frame_id=frame_count
+                ))
+        total_detections += len(detections)
+        # Update tracker
+        slots = TRACKER.update(frame, detections)
+        for slot in slots:
+            unique_tracks.add(slot.track_id)
+        # Draw results
+        frame, trajectories = draw_tracking_results(frame, slots, trajectories, frame_count)
+        writer.write(frame)
+        frame_count += 1
+        progress(frame_count / total_frames, desc=f"Processing frame {frame_count}/{total_frames}")
+    cap.release()
+    writer.release()
+    # Stats
+    stats = f"""
+    **Processing Complete**
+    - Total frames: {frame_count}
+    - Total detections: {total_detections}
+    - Unique tracks: {len(unique_tracks)}
+    - Average detections/frame: {total_detections/frame_count:.2f}
+    - Device: {DEVICE}
+    """
+    return output_path, stats
+def show_precomputed_demo(demo_name):
+    """Show a precomputed demo video"""
+    demo_videos = {
+        "Demo 1 - Multi-tool tracking": "demos/demo1_tracked.mp4",
+        "Demo 2 - Occlusion handling": "demos/demo2_tracked.mp4",
+        "Demo 3 - Tool re-identification": "demos/demo3_tracked.mp4",
+    }
+    video_path = demo_videos.get(demo_name)
+    if video_path and os.path.exists(video_path):
+        # Get stats from companion json if exists
+        stats = f"""
+        **{demo_name}**
+        Pre-computed tracking results using:
+        - YOLOv11x for detection
+        - Direction Estimator for operator prediction
+        - Operator-based tracker for multi-tool tracking
+        *Results computed on GPU, displayed instantly.*
+        """
+        return video_path, stats
+    else:
+        return None, f"Demo video not found: {video_path}"
+def get_available_demos():
+    """Get list of available demo videos"""
+    demos_dir = Path("demos")
+    if demos_dir.exists():
+        return [f.stem.replace("_tracked", "") for f in demos_dir.glob("*_tracked.mp4")]
+    return ["Demo 1 - Multi-tool tracking", "Demo 2 - Occlusion handling", "Demo 3 - Tool re-identification"]
+# Build Gradio interface
+def create_interface():
+    with gr.Blocks(
+        title="SurgiTrack - Surgical Tool Tracking",
+        theme=gr.themes.Base(
+            primary_hue="purple",
+            secondary_hue="gray",
+            neutral_hue="gray",
+        ).set(
+            body_background_fill="#0a0a0f",
+            body_background_fill_dark="#0a0a0f",
+            block_background_fill="#12121a",
+            block_background_fill_dark="#12121a",
+            block_border_color="#2a2a3a",
+            block_border_color_dark="#2a2a3a",
+            button_primary_background_fill="#a855f7",
+            button_primary_background_fill_hover="#9333ea",
+        ),
+        css="""
+        .gradio-container { max-width: 1200px !important; }
+        .gr-button { font-weight: 500; }
+        footer { display: none !important; }
+        """
+    ) as demo:
+        gr.Markdown("""
+        # 🔬 SurgiTrack - Surgical Tool Tracking
+        Multi-class multi-tool tracking in laparoscopic surgery videos.
+        Based on the [SurgiTrack paper](https://arxiv.org/abs/2312.07352) and trained on CholecTrack20 dataset.
+        **Pipeline:** YOLOv11x Detection → Direction Estimation → Operator-based Tracking
+        ---
+        """)
+        with gr.Tabs():
+            # Tab 1: Pre-computed demos (instant)
+            with gr.TabItem("📽️ Demo Videos (Instant)"):
+                gr.Markdown("""
+                ### Pre-computed Results
+                Watch tracking results instantly. These videos were processed on GPU with full pipeline.
+                """)
+                with gr.Row():
+                    demo_dropdown = gr.Dropdown(
+                        choices=get_available_demos(),
+                        label="Select Demo",
+                        value=get_available_demos()[0] if get_available_demos() else None
+                    )
+                    demo_btn = gr.Button("▶️ Show Demo", variant="primary")
+                with gr.Row():
+                    demo_video = gr.Video(label="Tracking Result")
+                    demo_stats = gr.Markdown(label="Statistics")
+                demo_btn.click(
+                    fn=show_precomputed_demo,
+                    inputs=[demo_dropdown],
+                    outputs=[demo_video, demo_stats]
+                )
+            # Tab 2: Live inference (slower but real)
+            with gr.TabItem("🔄 Live Inference (CPU)"):
+                gr.Markdown("""
+                ### Real-time Processing
+                Upload a short video clip (5-15 seconds recommended) for live tracking.
+                ⚠️ **Note:** Running on CPU - processing may take a few minutes.
+                """)
+                with gr.Row():
+                    with gr.Column():
+                        input_video = gr.Video(label="Upload Video")
+                        confidence_slider = gr.Slider(
+                            minimum=0.1, maximum=0.9, value=0.25, step=0.05,
+                            label="Detection Confidence Threshold"
+                        )
+                        process_btn = gr.Button("🚀 Run Tracking", variant="primary")
+                    with gr.Column():
+                        output_video = gr.Video(label="Tracked Video")
+                        output_stats = gr.Markdown(label="Statistics")
+                process_btn.click(
+                    fn=process_video_live,
+                    inputs=[input_video, confidence_slider],
+                    outputs=[output_video, output_stats]
+                )
+        gr.Markdown("""
+        ---
+        ### 📊 Method Overview
+        | Component | Description |
+        |-----------|-------------|
+        | **Detection** | YOLOv11x trained on CholecTrack20 (7 tool classes) |
+        | **Direction Estimator** | EfficientNet-B0 + Coordinate Attention → Operator prediction |
+        | **Tracker** | Operator-based slots for graspers, fixed IDs for other tools |
+        ### 📈 Results on CholecTrack20 Test Set
+        | Metric | Score |
+        |--------|-------|
+        | **HOTA** | 64.48% |
+        | **AssA** | 71.19% |
+        | **DetA** | 58.51% |
+        ---
+        **Dataset:** [CholecTrack20](https://arxiv.org/abs/2312.07352) (Nwoye et al., CVPR 2025)
+        **Author:** [Djalil Khelladi](https://github.com/akhellad)
+        """)
+    return demo
+if __name__ == "__main__":
+    print(f"Starting SurgiTrack Demo on {DEVICE}...")
+    # Try to load models
+    models_loaded = load_models()
+    if not models_loaded:
+        print("Warning: Models not loaded. Only pre-computed demos will work.")
+    # Create and launch interface
+    demo = create_interface()
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.0.0
+torch>=2.0.0
+torchvision>=0.15.0
+ultralytics>=8.0.0
+opencv-python-headless>=4.8.0
+numpy>=1.24.0
+scipy>=1.10.0

tracker.py ADDED Viewed

	@@ -0,0 +1,379 @@

+"""
+SurgiTrack - Tracker Module (Simplified for HF Space)
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional
+import cv2
+CLASS_NAMES = ['grasper', 'bipolar', 'hook', 'scissors', 'clipper', 'irrigator', 'specimenbag']
+OPERATORS = ['MSLH', 'MSRH', 'ASRH', 'NULL']
+class CoordinateAttention(nn.Module):
+    def __init__(self, in_channels, reduction=32):
+        super().__init__()
+        self.pool_h = nn.AdaptiveAvgPool2d((None, 1))
+        self.pool_w = nn.AdaptiveAvgPool2d((1, None))
+        mid_channels = max(8, in_channels // reduction)
+        self.conv1 = nn.Conv2d(in_channels, mid_channels, kernel_size=1)
+        self.bn1 = nn.BatchNorm2d(mid_channels)
+        self.act = nn.ReLU(inplace=True)
+        self.conv_h = nn.Conv2d(mid_channels, in_channels, kernel_size=1)
+        self.conv_w = nn.Conv2d(mid_channels, in_channels, kernel_size=1)
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x_h = self.pool_h(x)
+        x_w = self.pool_w(x).permute(0, 1, 3, 2)
+        y = torch.cat([x_h, x_w], dim=2)
+        y = self.act(self.bn1(self.conv1(y)))
+        x_h, x_w = torch.split(y, [H, W], dim=2)
+        x_w = x_w.permute(0, 1, 3, 2)
+        a_h = self.conv_h(x_h).sigmoid()
+        a_w = self.conv_w(x_w).sigmoid()
+        return x * a_h * a_w
+class DirectionEstimator(nn.Module):
+    def __init__(self, num_classes=4, embedding_dim=128, pretrained=True):
+        super().__init__()
+        self.backbone = models.efficientnet_b0(
+            weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1 if pretrained else None
+        )
+        backbone_out = self.backbone.classifier[1].in_features
+        self.backbone.classifier = nn.Identity()
+        self.coord_attention = CoordinateAttention(backbone_out)
+        self.embedding_head = nn.Sequential(
+            nn.Linear(backbone_out, 512),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.3),
+            nn.Linear(512, embedding_dim)
+        )
+        self.direction_head = nn.Sequential(
+            nn.Linear(embedding_dim, 64),
+            nn.ReLU(inplace=True),
+            nn.Dropout(0.2),
+            nn.Linear(64, num_classes)
+        )
+        self.embedding_dim = embedding_dim
+    def forward(self, x, return_embedding=False):
+        features = self.backbone.features(x)
+        features = self.coord_attention(features)
+        features = self.backbone.avgpool(features)
+        features = features.flatten(1)
+        embedding = self.embedding_head(features)
+        embedding = F.normalize(embedding, p=2, dim=1)
+        direction = self.direction_head(embedding)
+        if return_embedding:
+            return direction, embedding
+        return direction
+@dataclass
+class Detection:
+    bbox: np.ndarray
+    class_id: int
+    class_name: str
+    confidence: float
+    frame_id: int
+@dataclass
+class OperatorSlot:
+    operator_id: int
+    operator_name: str
+    track_id: int
+    active: bool = False
+    class_id: int = -1
+    class_name: str = ""
+    bbox: np.ndarray = None
+    confidence: float = 0.0
+    embedding: np.ndarray = None
+    last_seen_frame: int = -1
+    total_detections: int = 0
+    bbox_history: List[np.ndarray] = field(default_factory=list)
+    class_history: List[int] = field(default_factory=list)
+    def update(self, detection: Detection, embedding: np.ndarray, frame_id: int):
+        self.active = True
+        self.bbox = detection.bbox
+        self.class_id = detection.class_id
+        self.class_name = detection.class_name
+        self.confidence = detection.confidence
+        self.embedding = embedding
+        self.last_seen_frame = frame_id
+        self.total_detections += 1
+        self.bbox_history.append(detection.bbox.copy())
+        self.class_history.append(detection.class_id)
+        if len(self.bbox_history) > 100:
+            self.bbox_history.pop(0)
+            self.class_history.pop(0)
+    def mark_inactive(self):
+        self.active = False
+    def frames_since_seen(self, current_frame: int) -> int:
+        if self.last_seen_frame < 0:
+            return float('inf')
+        return current_frame - self.last_seen_frame
+class OperatorBasedTracker:
+    MAX_GRASPERS = 3
+    GRASPER_CLASS_ID = 0
+    SINGLE_INSTANCE_CLASSES = {1, 2, 3, 4, 5, 6}
+    def __init__(
+        self,
+        direction_model: DirectionEstimator = None,
+        max_inactive_frames: int = 300,
+        iou_threshold: float = 0.3,
+        direction_confidence_threshold: float = 0.5,
+        device: str = "cuda"
+    ):
+        self.direction_model = direction_model
+        self.max_inactive_frames = max_inactive_frames
+        self.iou_threshold = iou_threshold
+        self.direction_confidence_threshold = direction_confidence_threshold
+        self.device = device
+        self.grasper_slots: List[OperatorSlot] = []
+        self.class_slots: Dict[int, OperatorSlot] = {}
+        self.next_track_id = 1
+        self.frame_count = 0
+        self._initialize_slots()
+        if self.direction_model is not None:
+            self.direction_model.to(device)
+            self.direction_model.eval()
+    def _initialize_slots(self):
+        for i in range(self.MAX_GRASPERS):
+            slot = OperatorSlot(
+                operator_id=-1,
+                operator_name=f"grasper_{i+1}",
+                track_id=self.next_track_id
+            )
+            slot.class_id = self.GRASPER_CLASS_ID
+            slot.class_name = 'grasper'
+            self.next_track_id += 1
+            self.grasper_slots.append(slot)
+        for class_id in self.SINGLE_INSTANCE_CLASSES:
+            slot = OperatorSlot(
+                operator_id=3,
+                operator_name=f"CLASS_{CLASS_NAMES[class_id]}",
+                track_id=self.next_track_id
+            )
+            slot.class_id = class_id
+            slot.class_name = CLASS_NAMES[class_id]
+            self.next_track_id += 1
+            self.class_slots[class_id] = slot
+    def _get_direction_prediction(self, frame: np.ndarray, bbox: np.ndarray):
+        if self.direction_model is None:
+            return 3, np.array([0.25, 0.25, 0.25, 0.25])
+        x1, y1, x2, y2 = bbox.astype(int)
+        h, w = frame.shape[:2]
+        pad_x = int((x2 - x1) * 0.3)
+        pad_y = int((y2 - y1) * 0.5)
+        x1 = max(0, x1 - pad_x)
+        y1 = max(0, y1 - pad_y)
+        x2 = min(w, x2 + pad_x)
+        y2 = min(h, y2 + pad_y)
+        crop = frame[y1:y2, x1:x2]
+        if crop.size == 0:
+            return 3, np.array([0.25, 0.25, 0.25, 0.25])
+        crop = cv2.resize(crop, (224, 224))
+        crop = crop.astype(np.float32) / 255.0
+        crop = (crop - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]
+        crop = torch.from_numpy(crop).permute(2, 0, 1).unsqueeze(0).float().to(self.device)
+        with torch.no_grad():
+            logits, embedding = self.direction_model(crop, return_embedding=True)
+            probs = F.softmax(logits, dim=1).cpu().numpy()[0]
+        return np.argmax(probs), probs
+    def _compute_iou(self, bbox1: np.ndarray, bbox2: np.ndarray) -> float:
+        if bbox1 is None or bbox2 is None:
+            return 0.0
+        x1 = max(bbox1[0], bbox2[0])
+        y1 = max(bbox1[1], bbox2[1])
+        x2 = min(bbox1[2], bbox2[2])
+        y2 = min(bbox1[3], bbox2[3])
+        inter = max(0, x2 - x1) * max(0, y2 - y1)
+        area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
+        area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
+        union = area1 + area2 - inter
+        return inter / (union + 1e-6)
+    def _find_best_slot(self, detection: Detection, predicted_op: int, direction_probs: np.ndarray) -> Optional[OperatorSlot]:
+        class_id = detection.class_id
+        if class_id in self.SINGLE_INSTANCE_CLASSES:
+            slot = self.class_slots.get(class_id)
+            if slot:
+                recency = slot.frames_since_seen(self.frame_count)
+                if not slot.active and recency >= 75:
+                    slot.track_id = self.next_track_id
+                    self.next_track_id += 1
+            return slot
+        if class_id == self.GRASPER_CLASS_ID:
+            direction_confident = predicted_op < 3 and direction_probs[predicted_op] > self.direction_confidence_threshold
+            best_slot = None
+            best_score = -1
+            for slot in self.grasper_slots:
+                if slot.bbox is None:
+                    continue
+                recency = slot.frames_since_seen(self.frame_count)
+                if recency >= 75:
+                    continue
+                iou = self._compute_iou(detection.bbox, slot.bbox)
+                det_center = (detection.bbox[:2] + detection.bbox[2:]) / 2
+                slot_center = (slot.bbox[:2] + slot.bbox[2:]) / 2
+                dist = np.linalg.norm(det_center - slot_center)
+                if iou > self.iou_threshold:
+                    score = iou + (0.2 if slot.operator_id == predicted_op else 0)
+                elif dist < 150 and recency < 30:
+                    score = 0.1 + (0.2 if slot.operator_id == predicted_op else 0)
+                else:
+                    continue
+                if score > best_score:
+                    best_score = score
+                    best_slot = slot
+            if best_slot:
+                return best_slot
+            if direction_confident:
+                for slot in self.grasper_slots:
+                    if slot.active or slot.bbox is None:
+                        continue
+                    if slot.operator_id == predicted_op and slot.frames_since_seen(self.frame_count) < 75:
+                        return slot
+            if not direction_confident:
+                for slot in self.grasper_slots:
+                    if slot.active or slot.bbox is None:
+                        continue
+                    if slot.frames_since_seen(self.frame_count) < 30:
+                        det_center = (detection.bbox[:2] + detection.bbox[2:]) / 2
+                        slot_center = (slot.bbox[:2] + slot.bbox[2:]) / 2
+                        dist = np.linalg.norm(det_center - slot_center)
+                        if dist < 100:
+                            return slot
+            for slot in self.grasper_slots:
+                if not slot.active:
+                    slot.track_id = self.next_track_id
+                    self.next_track_id += 1
+                    return slot
+            worst_slot = None
+            worst_iou = 1.0
+            for slot in self.grasper_slots:
+                iou = self._compute_iou(detection.bbox, slot.bbox)
+                if iou < worst_iou:
+                    worst_iou = iou
+                    worst_slot = slot
+            if worst_slot:
+                worst_slot.track_id = self.next_track_id
+                self.next_track_id += 1
+            return worst_slot
+        return None
+    def update(self, frame: np.ndarray, detections: List[Detection]) -> List[OperatorSlot]:
+        self.frame_count += 1
+        all_slots = self.grasper_slots + list(self.class_slots.values())
+        for slot in all_slots:
+            if slot.active and slot.frames_since_seen(self.frame_count) > 150:
+                slot.mark_inactive()
+        if len(detections) == 0:
+            return self._get_active_slots()
+        detection_info = []
+        for det in detections:
+            pred_op, probs = self._get_direction_prediction(frame, det.bbox)
+            detection_info.append((det, pred_op, probs))
+        detection_info.sort(key=lambda x: -x[0].confidence)
+        assigned_slots = set()
+        for det, pred_op, probs in detection_info:
+            slot = self._find_best_slot(det, pred_op, probs)
+            if slot and slot.track_id not in assigned_slots:
+                slot.update(det, probs, self.frame_count)
+                if det.class_id == self.GRASPER_CLASS_ID:
+                    slot.operator_id = pred_op
+                assigned_slots.add(slot.track_id)
+        return self._get_active_slots()
+    def _get_active_slots(self) -> List[OperatorSlot]:
+        active = []
+        for slot in self.grasper_slots:
+            if slot.active and slot.last_seen_frame == self.frame_count:
+                active.append(slot)
+        for slot in self.class_slots.values():
+            if slot.active and slot.last_seen_frame == self.frame_count:
+                active.append(slot)
+        return active
+    def reset(self):
+        self.grasper_slots = []
+        self.class_slots = {}
+        self.next_track_id = 1
+        self.frame_count = 0
+        self._initialize_slots()