Spaces:

BiasLab2025
/

perception

Paused

Zhen Ye commited on 5 days ago

Commit

d2acfa9

1 Parent(s): 0effcd5

fix: Remove chat emojis, improve tracker GPT metadata sync

- Remove emojis from threat chat UI (use text labels YOU/TAC/SYS)
- Fix GPT reasoning to use track_id instead of index-based IDs
- Add inject_metadata() to ByteTracker for post-update GPT sync
- Add GPT_SYNC_KEYS for all threat intelligence fields
- Reset track ID counter on new ByteTracker instance
- Minor fixes to overlays, cards, and main.js

Files changed (8) hide show

frontend/index.html +2 -2
frontend/js/main.js +1 -1
frontend/js/ui/cards.js +3 -1
frontend/js/ui/chat.js +1 -1
frontend/js/ui/overlays.js +26 -5
inference.py +22 -42
utils/gpt_reasoning.py +1 -1
utils/tracker.py +43 -2

frontend/index.html CHANGED Viewed

@@ -198,13 +198,13 @@
             <!-- Threat Chat Panel -->
             <div class="panel panel-chat" id="chatPanel">
               <h3>
-                <span>🎖️ Threat Analyst Chat</span>
                 <button class="collapse-btn" id="chatToggle" style="font-size: 0.75rem;">▲ Close Chat</button>
               </h3>
               <div class="chat-container">
                 <div class="chat-messages" id="chatMessages">
                   <div class="chat-message chat-system">
-                    <span class="chat-icon">ℹ️</span>
                     <span class="chat-content">Run detection first, then ask questions about detected threats.</span>
                   </div>
                 </div>

             <!-- Threat Chat Panel -->
             <div class="panel panel-chat" id="chatPanel">
               <h3>
+                <span>Threat Analyst Chat</span>
                 <button class="collapse-btn" id="chatToggle" style="font-size: 0.75rem;">▲ Close Chat</button>
               </h3>
               <div class="chat-container">
                 <div class="chat-messages" id="chatMessages">
                   <div class="chat-message chat-system">
+                    <span class="chat-icon">SYS</span>
                     <span class="chat-content">Run detection first, then ask questions about detected threats.</span>
                   </div>
                 </div>

frontend/js/main.js CHANGED Viewed

@@ -499,7 +499,7 @@ document.addEventListener("DOMContentLoaded", () => {
     function processFirstFrameDetections(dets) {
         state.detections = dets.map((d, i) => {
-            const id = `T${String(i + 1).padStart(2, "0")}`;
             const ap = defaultAimpoint(d.label || d.class);
             const bbox = d.bbox
                 ? { x: d.bbox[0], y: d.bbox[1], w: d.bbox[2] - d.bbox[0], h: d.bbox[3] - d.bbox[1] }

     function processFirstFrameDetections(dets) {
         state.detections = dets.map((d, i) => {
+            const id = d.track_id || `T${String(i + 1).padStart(2, "0")}`;
             const ap = defaultAimpoint(d.label || d.class);
             const bbox = d.bbox
                 ? { x: d.bbox[0], y: d.bbox[1], w: d.bbox[2] - d.bbox[0], h: d.bbox[3] - d.bbox[1] }

frontend/js/ui/cards.js CHANGED Viewed

@@ -17,7 +17,9 @@ APP.ui.cards.renderFrameTrackList = function () {
         return;
     }
-    dets.forEach((det, i) => {
         const id = det.id || `T${String(i + 1).padStart(2, '0')}`;
         let rangeStr = "---";

         return;
     }
+    const sorted = [...dets].sort((a, b) => (b.threat_level_score || 0) - (a.threat_level_score || 0));
+    sorted.forEach((det, i) => {
         const id = det.id || `T${String(i + 1).padStart(2, '0')}`;
         let rangeStr = "---";

frontend/js/ui/chat.js CHANGED Viewed

@@ -110,7 +110,7 @@
         // Format content with line breaks
         const formatted = content.replace(/\n/g, "<br>");
-        const icon = role === "user" ? "👤" : role === "assistant" ? "🎖️" : "⚠️";
         msgDiv.innerHTML = `<span class="chat-icon">${icon}</span><span class="chat-content">${formatted}</span>`;
         chatMessages.appendChild(msgDiv);

         // Format content with line breaks
         const formatted = content.replace(/\n/g, "<br>");
+        const icon = role === "user" ? "YOU" : role === "assistant" ? "TAC" : "SYS";
         msgDiv.innerHTML = `<span class="chat-icon">${icon}</span><span class="chat-content">${formatted}</span>`;
         chatMessages.appendChild(msgDiv);

frontend/js/ui/overlays.js CHANGED Viewed

@@ -88,6 +88,19 @@ APP.ui.overlays.render = function (canvasId, trackSource) {
         ctx.shadowBlur = isSel ? 18 : 10;
         roundRect(ctx, b.x, b.y, b.w, b.h, 10, false, true);
         // pseudo mask glow
         ctx.shadowBlur = 0;
         const g = ctx.createRadialGradient(b.x + b.w * 0.5, b.y + b.h * 0.5, 10, b.x + b.w * 0.5, b.y + b.h * 0.5, Math.max(b.w, b.h) * 0.75);
@@ -130,14 +143,22 @@ APP.ui.overlays.render = function (canvasId, trackSource) {
 };
 APP.ui.overlays.renderFrameOverlay = function () {
     const { $ } = APP.core.utils;
-    // User request: No overlays on first frame (Tab 1)
     const canvas = $("#frameOverlay");
-    if (canvas) {
-        const ctx = canvas.getContext("2d");
-        ctx.clearRect(0, 0, canvas.width, canvas.height);
     }
-    // APP.ui.overlays.render("frameOverlay", state.detections);
 };
 APP.ui.overlays.renderEngageOverlay = function () {

         ctx.shadowBlur = isSel ? 18 : 10;
         roundRect(ctx, b.x, b.y, b.w, b.h, 10, false, true);
+        // Animated pulsing glow for selected box
+        if (isSel) {
+            const pulse = Math.sin(t * 3) * 0.5 + 0.5; // 0..1 pulsing
+            ctx.save();
+            ctx.strokeStyle = `rgba(34,211,238,${0.25 + pulse * 0.35})`;
+            ctx.lineWidth = 6;
+            ctx.shadowColor = `rgba(34,211,238,${0.3 + pulse * 0.4})`;
+            ctx.shadowBlur = 28 + pulse * 20;
+            roundRect(ctx, b.x, b.y, b.w, b.h, 10, false, true);
+            ctx.restore();
+            ctx.lineWidth = 3; // restore for subsequent drawing
+        }
         // pseudo mask glow
         ctx.shadowBlur = 0;
         const g = ctx.createRadialGradient(b.x + b.w * 0.5, b.y + b.h * 0.5, 10, b.x + b.w * 0.5, b.y + b.h * 0.5, Math.max(b.w, b.h) * 0.75);
 };
 APP.ui.overlays.renderFrameOverlay = function () {
+    const { state } = APP.core;
     const { $ } = APP.core.utils;
     const canvas = $("#frameOverlay");
+    if (!canvas) return;
+    // Only show overlay for the selected detection
+    if (state.selectedId) {
+        const sel = (state.detections || []).filter(d => d.id === state.selectedId);
+        if (sel.length) {
+            APP.ui.overlays.render("frameOverlay", sel);
+            return;
+        }
     }
+    // Nothing selected — clear
+    const ctx = canvas.getContext("2d");
+    ctx.clearRect(0, 0, canvas.width, canvas.height);
 };
 APP.ui.overlays.renderEngageOverlay = function () {

inference.py CHANGED Viewed

@@ -773,7 +773,10 @@ def process_first_frame(
             # GPT returns { "T01": { "distance_m": ..., "direction": ... } }
             # Detections are list of dicts. We assume T01 maps to index 0, T02 to index 1...
             for i, det in enumerate(detections):
-                 # ID format matches what we constructed in gpt_reasoning.py
                  obj_id = f"T{str(i+1).zfill(2)}"
                  if obj_id in gpt_results:
                      info = gpt_results[obj_id]
@@ -1106,56 +1109,33 @@ def run_inference(
                         # Write next_idx
                         p_frame, dets = buffer.pop(next_idx)
                         # --- GPT ESTIMATION (Frame 0 Only) ---
                         if next_idx == 0 and enable_gpt and dets:
                             try:
                                 logging.info("Running GPT estimation for video start (Frame 0)...")
                                 with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
-                                    cv2.imwrite(tmp.name, p_frame) # Use processed frame (boxes not yet drawn)
-                                    # Wait, p_frame might have heatmaps if depth enabled? No, draw_boxes comes later.
-                                    # Actually, colorize_depth_map might have happened in worker.
-                                    # But raw image is better? We don't have raw image here easily without stashing.
-                                    # p_frame is 'processed'. If depth map enabled, it's a heatmap. Not good for GPT.
-                                    # GPT needs RGB image.
-                                    # Worker: processed = frame.copy() -> colorize -> draw_boxes (removed).
-                                    # So processed is potentially modified.
-                                    # Ideally we want original.
-                                    # But let's assume for now processed is fine (if depth disabled) or GPT can handle it.
-                                    # If depth is enabled, processed is a heatmap. GPT will fail to see car color/details.
-                                    # FIX: We need access to original frame?
-                                    # worker sends (idx, processed, detections).
-                                    # It does NOT send original frame.
-                                    # We should change worker to send original? Or assume GPT runs on processed?
-                                    # If processed is heatmap, it's bad.
-                                    # But User Objective says "legacy depth estimation" is optional/deprecated.
-                                    # If depth_estimator_name is None, processed is just frame.
-                                    gpt_res = estimate_distance_gpt(tmp.name, dets)
                                     os.remove(tmp.name)
-                                    # Merge
-                                    # Helper to match IDs?
-                                    # estimate_distance_gpt expects us to pass detections list, output keyed by T01..
-                                    # But detections don't have IDs yet! SimpleTracker assigns them.
-                                    # We assign temporary IDs T01.. based on index for GPT matching?
-                                    # gpt_distance.py generates IDs if not present.
-                                    # Let's inspect gpt_distance.py... assume it matches by index T01, T02...
-                                    for i, d in enumerate(dets):
-                                        oid = f"T{str(i+1).zfill(2)}"
-                                        if oid in gpt_res:
                                             d.update(gpt_res[oid])
                             except Exception as e:
                                 logging.error("GPT failed for Frame 0: %s", e)
-                        # --- SEQUENTIAL TRACKING ---
-                        # Update tracker with current frame detections
-                        # ByteTracker returns the list of ACTIVE tracks with IDs
-                        dets = tracker.update(dets)
-                        speed_est.estimate(dets)
                         # --- RENDER BOXES & OVERLAYS ---
                         # We need to convert list of dicts back to boxes array for draw_boxes

             # GPT returns { "T01": { "distance_m": ..., "direction": ... } }
             # Detections are list of dicts. We assume T01 maps to index 0, T02 to index 1...
             for i, det in enumerate(detections):
+                 # Index-based IDs are intentional here: no tracker runs for first-frame
+                 # preview, so GPT, inference merge, and frontend all use the same
+                 # index-based scheme (T01=index 0, T02=index 1, ...), keeping it
+                 # self-consistent. The video pipeline uses real ByteTracker IDs instead.
                  obj_id = f"T{str(i+1).zfill(2)}"
                  if obj_id in gpt_results:
                      info = gpt_results[obj_id]
                         # Write next_idx
                         p_frame, dets = buffer.pop(next_idx)
+                        # --- SEQUENTIAL TRACKING ---
+                        # Run tracker FIRST so detections get real track_id from ByteTracker
+                        dets = tracker.update(dets)
+                        speed_est.estimate(dets)
                         # --- GPT ESTIMATION (Frame 0 Only) ---
                         if next_idx == 0 and enable_gpt and dets:
                             try:
                                 logging.info("Running GPT estimation for video start (Frame 0)...")
                                 with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
+                                    cv2.imwrite(tmp.name, p_frame)
+                                    gpt_res = estimate_threat_gpt(tmp.name, dets)
                                     os.remove(tmp.name)
+                                    # Merge using real track_id assigned by ByteTracker
+                                    for d in dets:
+                                        oid = d.get('track_id')
+                                        if oid and oid in gpt_res:
                                             d.update(gpt_res[oid])
+                                # Push GPT data back into tracker's internal STrack objects
+                                # so it persists across subsequent frames via _sync_data
+                                tracker.inject_metadata(dets)
                             except Exception as e:
                                 logging.error("GPT failed for Frame 0: %s", e)
                         # --- RENDER BOXES & OVERLAYS ---
                         # We need to convert list of dicts back to boxes array for draw_boxes

utils/gpt_reasoning.py CHANGED Viewed

@@ -36,7 +36,7 @@ def estimate_threat_gpt(
     det_summary = []
     for i, det in enumerate(detections):
         # UI uses T01, T02... logic usually matches index + 1
-        obj_id = f"T{str(i+1).zfill(2)}"
         bbox = det.get("bbox", [])
         label = det.get("label", "object")
         det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")

     det_summary = []
     for i, det in enumerate(detections):
         # UI uses T01, T02... logic usually matches index + 1
+        obj_id = det.get("track_id") or det.get("id") or f"T{str(i+1).zfill(2)}"
         bbox = det.get("bbox", [])
         label = det.get("label", "object")
         det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")

utils/tracker.py CHANGED Viewed

@@ -195,6 +195,21 @@ class KalmanFilter:
         return ret
 class STrack:
     """
     Single object track. Wrapper around KalmanFilter state.
@@ -329,9 +344,15 @@ class STrack:
         STrack._count += 1
         return STrack._count
 class ByteTracker:
     def __init__(self, track_thresh=0.5, track_buffer=30, match_thresh=0.8, frame_rate=30):
         self.track_thresh = track_thresh
         self.track_buffer = track_buffer
         self.match_thresh = match_thresh
@@ -520,12 +541,32 @@ class ByteTracker:
         """Propagate attributes like GPT data between track and detection."""
         # 1. From Source to Track (Update)
         source_data = det_source.original_data if hasattr(det_source, 'original_data') else {}
-        for k in ['gpt_distance_m', 'gpt_direction', 'gpt_description']:
             if k in source_data:
                 track.gpt_data[k] = source_data[k]
         # 2. From Track to Source (Forward fill logic handled in output construction)
 # --- Helper Functions ---

         return ret
+GPT_SYNC_KEYS = frozenset({
+    # Legacy fields
+    "gpt_distance_m", "gpt_direction", "gpt_description", "gpt_raw",
+    # Threat intelligence
+    "threat_level_score", "threat_classification", "weapon_readiness",
+    # Naval assessment fields from schema
+    "vessel_category", "specific_class", "identity_markers", "flag_state",
+    "visible_weapons", "sensor_profile", "motion_status", "wake_description",
+    "aspect", "range_estimation_nm", "bearing_clock", "deck_activity",
+    "special_features", "tactical_intent",
+    # Computed fields
+    "distance_m", "direction", "description",
+})
 class STrack:
     """
     Single object track. Wrapper around KalmanFilter state.
         STrack._count += 1
         return STrack._count
+    @staticmethod
+    def reset_count():
+        """Reset track ID counter so the next video starts at T01."""
+        STrack._count = 0
 class ByteTracker:
     def __init__(self, track_thresh=0.5, track_buffer=30, match_thresh=0.8, frame_rate=30):
+        STrack.reset_count()
         self.track_thresh = track_thresh
         self.track_buffer = track_buffer
         self.match_thresh = match_thresh
         """Propagate attributes like GPT data between track and detection."""
         # 1. From Source to Track (Update)
         source_data = det_source.original_data if hasattr(det_source, 'original_data') else {}
+        for k in GPT_SYNC_KEYS:
             if k in source_data:
                 track.gpt_data[k] = source_data[k]
         # 2. From Track to Source (Forward fill logic handled in output construction)
+    def inject_metadata(self, tracked_dets):
+        """Push metadata from post-processed detection dicts back into internal STrack objects.
+        Needed because GPT results are added to detection dicts *after* tracker.update()
+        returns, so the tracker's internal state doesn't have GPT data unless we
+        explicitly push it back in.
+        """
+        meta_by_tid = {}
+        for d in tracked_dets:
+            tid = d.get('track_id')
+            if not tid:
+                continue
+            meta = {k: d[k] for k in GPT_SYNC_KEYS if k in d}
+            if meta:
+                meta_by_tid[tid] = meta
+        for track in self.tracked_stracks:
+            tid_str = f"T{str(track.track_id).zfill(2)}"
+            if tid_str in meta_by_tid:
+                track.gpt_data.update(meta_by_tid[tid_str])
 # --- Helper Functions ---