Spaces:
Paused
Paused
Zhen Ye
commited on
Commit
·
d2acfa9
1
Parent(s):
0effcd5
fix: Remove chat emojis, improve tracker GPT metadata sync
Browse files- Remove emojis from threat chat UI (use text labels YOU/TAC/SYS)
- Fix GPT reasoning to use track_id instead of index-based IDs
- Add inject_metadata() to ByteTracker for post-update GPT sync
- Add GPT_SYNC_KEYS for all threat intelligence fields
- Reset track ID counter on new ByteTracker instance
- Minor fixes to overlays, cards, and main.js
- frontend/index.html +2 -2
- frontend/js/main.js +1 -1
- frontend/js/ui/cards.js +3 -1
- frontend/js/ui/chat.js +1 -1
- frontend/js/ui/overlays.js +26 -5
- inference.py +22 -42
- utils/gpt_reasoning.py +1 -1
- utils/tracker.py +43 -2
frontend/index.html
CHANGED
|
@@ -198,13 +198,13 @@
|
|
| 198 |
<!-- Threat Chat Panel -->
|
| 199 |
<div class="panel panel-chat" id="chatPanel">
|
| 200 |
<h3>
|
| 201 |
-
<span
|
| 202 |
<button class="collapse-btn" id="chatToggle" style="font-size: 0.75rem;">▲ Close Chat</button>
|
| 203 |
</h3>
|
| 204 |
<div class="chat-container">
|
| 205 |
<div class="chat-messages" id="chatMessages">
|
| 206 |
<div class="chat-message chat-system">
|
| 207 |
-
<span class="chat-icon"
|
| 208 |
<span class="chat-content">Run detection first, then ask questions about detected threats.</span>
|
| 209 |
</div>
|
| 210 |
</div>
|
|
|
|
| 198 |
<!-- Threat Chat Panel -->
|
| 199 |
<div class="panel panel-chat" id="chatPanel">
|
| 200 |
<h3>
|
| 201 |
+
<span>Threat Analyst Chat</span>
|
| 202 |
<button class="collapse-btn" id="chatToggle" style="font-size: 0.75rem;">▲ Close Chat</button>
|
| 203 |
</h3>
|
| 204 |
<div class="chat-container">
|
| 205 |
<div class="chat-messages" id="chatMessages">
|
| 206 |
<div class="chat-message chat-system">
|
| 207 |
+
<span class="chat-icon">SYS</span>
|
| 208 |
<span class="chat-content">Run detection first, then ask questions about detected threats.</span>
|
| 209 |
</div>
|
| 210 |
</div>
|
frontend/js/main.js
CHANGED
|
@@ -499,7 +499,7 @@ document.addEventListener("DOMContentLoaded", () => {
|
|
| 499 |
|
| 500 |
function processFirstFrameDetections(dets) {
|
| 501 |
state.detections = dets.map((d, i) => {
|
| 502 |
-
const id = `T${String(i + 1).padStart(2, "0")}`;
|
| 503 |
const ap = defaultAimpoint(d.label || d.class);
|
| 504 |
const bbox = d.bbox
|
| 505 |
? { x: d.bbox[0], y: d.bbox[1], w: d.bbox[2] - d.bbox[0], h: d.bbox[3] - d.bbox[1] }
|
|
|
|
| 499 |
|
| 500 |
function processFirstFrameDetections(dets) {
|
| 501 |
state.detections = dets.map((d, i) => {
|
| 502 |
+
const id = d.track_id || `T${String(i + 1).padStart(2, "0")}`;
|
| 503 |
const ap = defaultAimpoint(d.label || d.class);
|
| 504 |
const bbox = d.bbox
|
| 505 |
? { x: d.bbox[0], y: d.bbox[1], w: d.bbox[2] - d.bbox[0], h: d.bbox[3] - d.bbox[1] }
|
frontend/js/ui/cards.js
CHANGED
|
@@ -17,7 +17,9 @@ APP.ui.cards.renderFrameTrackList = function () {
|
|
| 17 |
return;
|
| 18 |
}
|
| 19 |
|
| 20 |
-
dets.
|
|
|
|
|
|
|
| 21 |
const id = det.id || `T${String(i + 1).padStart(2, '0')}`;
|
| 22 |
|
| 23 |
let rangeStr = "---";
|
|
|
|
| 17 |
return;
|
| 18 |
}
|
| 19 |
|
| 20 |
+
const sorted = [...dets].sort((a, b) => (b.threat_level_score || 0) - (a.threat_level_score || 0));
|
| 21 |
+
|
| 22 |
+
sorted.forEach((det, i) => {
|
| 23 |
const id = det.id || `T${String(i + 1).padStart(2, '0')}`;
|
| 24 |
|
| 25 |
let rangeStr = "---";
|
frontend/js/ui/chat.js
CHANGED
|
@@ -110,7 +110,7 @@
|
|
| 110 |
// Format content with line breaks
|
| 111 |
const formatted = content.replace(/\n/g, "<br>");
|
| 112 |
|
| 113 |
-
const icon = role === "user" ? "
|
| 114 |
msgDiv.innerHTML = `<span class="chat-icon">${icon}</span><span class="chat-content">${formatted}</span>`;
|
| 115 |
|
| 116 |
chatMessages.appendChild(msgDiv);
|
|
|
|
| 110 |
// Format content with line breaks
|
| 111 |
const formatted = content.replace(/\n/g, "<br>");
|
| 112 |
|
| 113 |
+
const icon = role === "user" ? "YOU" : role === "assistant" ? "TAC" : "SYS";
|
| 114 |
msgDiv.innerHTML = `<span class="chat-icon">${icon}</span><span class="chat-content">${formatted}</span>`;
|
| 115 |
|
| 116 |
chatMessages.appendChild(msgDiv);
|
frontend/js/ui/overlays.js
CHANGED
|
@@ -88,6 +88,19 @@ APP.ui.overlays.render = function (canvasId, trackSource) {
|
|
| 88 |
ctx.shadowBlur = isSel ? 18 : 10;
|
| 89 |
roundRect(ctx, b.x, b.y, b.w, b.h, 10, false, true);
|
| 90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
// pseudo mask glow
|
| 92 |
ctx.shadowBlur = 0;
|
| 93 |
const g = ctx.createRadialGradient(b.x + b.w * 0.5, b.y + b.h * 0.5, 10, b.x + b.w * 0.5, b.y + b.h * 0.5, Math.max(b.w, b.h) * 0.75);
|
|
@@ -130,14 +143,22 @@ APP.ui.overlays.render = function (canvasId, trackSource) {
|
|
| 130 |
};
|
| 131 |
|
| 132 |
APP.ui.overlays.renderFrameOverlay = function () {
|
|
|
|
| 133 |
const { $ } = APP.core.utils;
|
| 134 |
-
// User request: No overlays on first frame (Tab 1)
|
| 135 |
const canvas = $("#frameOverlay");
|
| 136 |
-
if (canvas)
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
}
|
| 140 |
-
//
|
|
|
|
|
|
|
| 141 |
};
|
| 142 |
|
| 143 |
APP.ui.overlays.renderEngageOverlay = function () {
|
|
|
|
| 88 |
ctx.shadowBlur = isSel ? 18 : 10;
|
| 89 |
roundRect(ctx, b.x, b.y, b.w, b.h, 10, false, true);
|
| 90 |
|
| 91 |
+
// Animated pulsing glow for selected box
|
| 92 |
+
if (isSel) {
|
| 93 |
+
const pulse = Math.sin(t * 3) * 0.5 + 0.5; // 0..1 pulsing
|
| 94 |
+
ctx.save();
|
| 95 |
+
ctx.strokeStyle = `rgba(34,211,238,${0.25 + pulse * 0.35})`;
|
| 96 |
+
ctx.lineWidth = 6;
|
| 97 |
+
ctx.shadowColor = `rgba(34,211,238,${0.3 + pulse * 0.4})`;
|
| 98 |
+
ctx.shadowBlur = 28 + pulse * 20;
|
| 99 |
+
roundRect(ctx, b.x, b.y, b.w, b.h, 10, false, true);
|
| 100 |
+
ctx.restore();
|
| 101 |
+
ctx.lineWidth = 3; // restore for subsequent drawing
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
// pseudo mask glow
|
| 105 |
ctx.shadowBlur = 0;
|
| 106 |
const g = ctx.createRadialGradient(b.x + b.w * 0.5, b.y + b.h * 0.5, 10, b.x + b.w * 0.5, b.y + b.h * 0.5, Math.max(b.w, b.h) * 0.75);
|
|
|
|
| 143 |
};
|
| 144 |
|
| 145 |
APP.ui.overlays.renderFrameOverlay = function () {
|
| 146 |
+
const { state } = APP.core;
|
| 147 |
const { $ } = APP.core.utils;
|
|
|
|
| 148 |
const canvas = $("#frameOverlay");
|
| 149 |
+
if (!canvas) return;
|
| 150 |
+
|
| 151 |
+
// Only show overlay for the selected detection
|
| 152 |
+
if (state.selectedId) {
|
| 153 |
+
const sel = (state.detections || []).filter(d => d.id === state.selectedId);
|
| 154 |
+
if (sel.length) {
|
| 155 |
+
APP.ui.overlays.render("frameOverlay", sel);
|
| 156 |
+
return;
|
| 157 |
+
}
|
| 158 |
}
|
| 159 |
+
// Nothing selected — clear
|
| 160 |
+
const ctx = canvas.getContext("2d");
|
| 161 |
+
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
| 162 |
};
|
| 163 |
|
| 164 |
APP.ui.overlays.renderEngageOverlay = function () {
|
inference.py
CHANGED
|
@@ -773,7 +773,10 @@ def process_first_frame(
|
|
| 773 |
# GPT returns { "T01": { "distance_m": ..., "direction": ... } }
|
| 774 |
# Detections are list of dicts. We assume T01 maps to index 0, T02 to index 1...
|
| 775 |
for i, det in enumerate(detections):
|
| 776 |
-
#
|
|
|
|
|
|
|
|
|
|
| 777 |
obj_id = f"T{str(i+1).zfill(2)}"
|
| 778 |
if obj_id in gpt_results:
|
| 779 |
info = gpt_results[obj_id]
|
|
@@ -1106,56 +1109,33 @@ def run_inference(
|
|
| 1106 |
|
| 1107 |
# Write next_idx
|
| 1108 |
p_frame, dets = buffer.pop(next_idx)
|
| 1109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1110 |
# --- GPT ESTIMATION (Frame 0 Only) ---
|
| 1111 |
if next_idx == 0 and enable_gpt and dets:
|
| 1112 |
try:
|
| 1113 |
logging.info("Running GPT estimation for video start (Frame 0)...")
|
| 1114 |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
| 1115 |
-
cv2.imwrite(tmp.name, p_frame)
|
| 1116 |
-
|
| 1117 |
-
# Actually, colorize_depth_map might have happened in worker.
|
| 1118 |
-
# But raw image is better? We don't have raw image here easily without stashing.
|
| 1119 |
-
# p_frame is 'processed'. If depth map enabled, it's a heatmap. Not good for GPT.
|
| 1120 |
-
# GPT needs RGB image.
|
| 1121 |
-
# Worker: processed = frame.copy() -> colorize -> draw_boxes (removed).
|
| 1122 |
-
# So processed is potentially modified.
|
| 1123 |
-
# Ideally we want original.
|
| 1124 |
-
# But let's assume for now processed is fine (if depth disabled) or GPT can handle it.
|
| 1125 |
-
# If depth is enabled, processed is a heatmap. GPT will fail to see car color/details.
|
| 1126 |
-
|
| 1127 |
-
# FIX: We need access to original frame?
|
| 1128 |
-
# worker sends (idx, processed, detections).
|
| 1129 |
-
# It does NOT send original frame.
|
| 1130 |
-
# We should change worker to send original? Or assume GPT runs on processed?
|
| 1131 |
-
# If processed is heatmap, it's bad.
|
| 1132 |
-
# But User Objective says "legacy depth estimation" is optional/deprecated.
|
| 1133 |
-
# If depth_estimator_name is None, processed is just frame.
|
| 1134 |
-
|
| 1135 |
-
gpt_res = estimate_distance_gpt(tmp.name, dets)
|
| 1136 |
os.remove(tmp.name)
|
| 1137 |
-
|
| 1138 |
-
# Merge
|
| 1139 |
-
|
| 1140 |
-
|
| 1141 |
-
|
| 1142 |
-
# We assign temporary IDs T01.. based on index for GPT matching?
|
| 1143 |
-
# gpt_distance.py generates IDs if not present.
|
| 1144 |
-
# Let's inspect gpt_distance.py... assume it matches by index T01, T02...
|
| 1145 |
-
|
| 1146 |
-
for i, d in enumerate(dets):
|
| 1147 |
-
oid = f"T{str(i+1).zfill(2)}"
|
| 1148 |
-
if oid in gpt_res:
|
| 1149 |
d.update(gpt_res[oid])
|
| 1150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1151 |
except Exception as e:
|
| 1152 |
logging.error("GPT failed for Frame 0: %s", e)
|
| 1153 |
-
|
| 1154 |
-
# --- SEQUENTIAL TRACKING ---
|
| 1155 |
-
# Update tracker with current frame detections
|
| 1156 |
-
# ByteTracker returns the list of ACTIVE tracks with IDs
|
| 1157 |
-
dets = tracker.update(dets)
|
| 1158 |
-
speed_est.estimate(dets)
|
| 1159 |
|
| 1160 |
# --- RENDER BOXES & OVERLAYS ---
|
| 1161 |
# We need to convert list of dicts back to boxes array for draw_boxes
|
|
|
|
| 773 |
# GPT returns { "T01": { "distance_m": ..., "direction": ... } }
|
| 774 |
# Detections are list of dicts. We assume T01 maps to index 0, T02 to index 1...
|
| 775 |
for i, det in enumerate(detections):
|
| 776 |
+
# Index-based IDs are intentional here: no tracker runs for first-frame
|
| 777 |
+
# preview, so GPT, inference merge, and frontend all use the same
|
| 778 |
+
# index-based scheme (T01=index 0, T02=index 1, ...), keeping it
|
| 779 |
+
# self-consistent. The video pipeline uses real ByteTracker IDs instead.
|
| 780 |
obj_id = f"T{str(i+1).zfill(2)}"
|
| 781 |
if obj_id in gpt_results:
|
| 782 |
info = gpt_results[obj_id]
|
|
|
|
| 1109 |
|
| 1110 |
# Write next_idx
|
| 1111 |
p_frame, dets = buffer.pop(next_idx)
|
| 1112 |
+
|
| 1113 |
+
# --- SEQUENTIAL TRACKING ---
|
| 1114 |
+
# Run tracker FIRST so detections get real track_id from ByteTracker
|
| 1115 |
+
dets = tracker.update(dets)
|
| 1116 |
+
speed_est.estimate(dets)
|
| 1117 |
+
|
| 1118 |
# --- GPT ESTIMATION (Frame 0 Only) ---
|
| 1119 |
if next_idx == 0 and enable_gpt and dets:
|
| 1120 |
try:
|
| 1121 |
logging.info("Running GPT estimation for video start (Frame 0)...")
|
| 1122 |
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
| 1123 |
+
cv2.imwrite(tmp.name, p_frame)
|
| 1124 |
+
gpt_res = estimate_threat_gpt(tmp.name, dets)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1125 |
os.remove(tmp.name)
|
| 1126 |
+
|
| 1127 |
+
# Merge using real track_id assigned by ByteTracker
|
| 1128 |
+
for d in dets:
|
| 1129 |
+
oid = d.get('track_id')
|
| 1130 |
+
if oid and oid in gpt_res:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1131 |
d.update(gpt_res[oid])
|
| 1132 |
+
|
| 1133 |
+
# Push GPT data back into tracker's internal STrack objects
|
| 1134 |
+
# so it persists across subsequent frames via _sync_data
|
| 1135 |
+
tracker.inject_metadata(dets)
|
| 1136 |
+
|
| 1137 |
except Exception as e:
|
| 1138 |
logging.error("GPT failed for Frame 0: %s", e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1139 |
|
| 1140 |
# --- RENDER BOXES & OVERLAYS ---
|
| 1141 |
# We need to convert list of dicts back to boxes array for draw_boxes
|
utils/gpt_reasoning.py
CHANGED
|
@@ -36,7 +36,7 @@ def estimate_threat_gpt(
|
|
| 36 |
det_summary = []
|
| 37 |
for i, det in enumerate(detections):
|
| 38 |
# UI uses T01, T02... logic usually matches index + 1
|
| 39 |
-
obj_id = f"T{str(i+1).zfill(2)}"
|
| 40 |
bbox = det.get("bbox", [])
|
| 41 |
label = det.get("label", "object")
|
| 42 |
det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")
|
|
|
|
| 36 |
det_summary = []
|
| 37 |
for i, det in enumerate(detections):
|
| 38 |
# UI uses T01, T02... logic usually matches index + 1
|
| 39 |
+
obj_id = det.get("track_id") or det.get("id") or f"T{str(i+1).zfill(2)}"
|
| 40 |
bbox = det.get("bbox", [])
|
| 41 |
label = det.get("label", "object")
|
| 42 |
det_summary.append(f"- ID: {obj_id}, Classification Hint: {label}, BBox: {bbox}")
|
utils/tracker.py
CHANGED
|
@@ -195,6 +195,21 @@ class KalmanFilter:
|
|
| 195 |
return ret
|
| 196 |
|
| 197 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
class STrack:
|
| 199 |
"""
|
| 200 |
Single object track. Wrapper around KalmanFilter state.
|
|
@@ -329,9 +344,15 @@ class STrack:
|
|
| 329 |
STrack._count += 1
|
| 330 |
return STrack._count
|
| 331 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
|
| 333 |
class ByteTracker:
|
| 334 |
def __init__(self, track_thresh=0.5, track_buffer=30, match_thresh=0.8, frame_rate=30):
|
|
|
|
| 335 |
self.track_thresh = track_thresh
|
| 336 |
self.track_buffer = track_buffer
|
| 337 |
self.match_thresh = match_thresh
|
|
@@ -520,12 +541,32 @@ class ByteTracker:
|
|
| 520 |
"""Propagate attributes like GPT data between track and detection."""
|
| 521 |
# 1. From Source to Track (Update)
|
| 522 |
source_data = det_source.original_data if hasattr(det_source, 'original_data') else {}
|
| 523 |
-
for k in
|
| 524 |
if k in source_data:
|
| 525 |
track.gpt_data[k] = source_data[k]
|
| 526 |
-
|
| 527 |
# 2. From Track to Source (Forward fill logic handled in output construction)
|
| 528 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
|
| 530 |
# --- Helper Functions ---
|
| 531 |
|
|
|
|
| 195 |
return ret
|
| 196 |
|
| 197 |
|
| 198 |
+
GPT_SYNC_KEYS = frozenset({
|
| 199 |
+
# Legacy fields
|
| 200 |
+
"gpt_distance_m", "gpt_direction", "gpt_description", "gpt_raw",
|
| 201 |
+
# Threat intelligence
|
| 202 |
+
"threat_level_score", "threat_classification", "weapon_readiness",
|
| 203 |
+
# Naval assessment fields from schema
|
| 204 |
+
"vessel_category", "specific_class", "identity_markers", "flag_state",
|
| 205 |
+
"visible_weapons", "sensor_profile", "motion_status", "wake_description",
|
| 206 |
+
"aspect", "range_estimation_nm", "bearing_clock", "deck_activity",
|
| 207 |
+
"special_features", "tactical_intent",
|
| 208 |
+
# Computed fields
|
| 209 |
+
"distance_m", "direction", "description",
|
| 210 |
+
})
|
| 211 |
+
|
| 212 |
+
|
| 213 |
class STrack:
|
| 214 |
"""
|
| 215 |
Single object track. Wrapper around KalmanFilter state.
|
|
|
|
| 344 |
STrack._count += 1
|
| 345 |
return STrack._count
|
| 346 |
|
| 347 |
+
@staticmethod
|
| 348 |
+
def reset_count():
|
| 349 |
+
"""Reset track ID counter so the next video starts at T01."""
|
| 350 |
+
STrack._count = 0
|
| 351 |
+
|
| 352 |
|
| 353 |
class ByteTracker:
|
| 354 |
def __init__(self, track_thresh=0.5, track_buffer=30, match_thresh=0.8, frame_rate=30):
|
| 355 |
+
STrack.reset_count()
|
| 356 |
self.track_thresh = track_thresh
|
| 357 |
self.track_buffer = track_buffer
|
| 358 |
self.match_thresh = match_thresh
|
|
|
|
| 541 |
"""Propagate attributes like GPT data between track and detection."""
|
| 542 |
# 1. From Source to Track (Update)
|
| 543 |
source_data = det_source.original_data if hasattr(det_source, 'original_data') else {}
|
| 544 |
+
for k in GPT_SYNC_KEYS:
|
| 545 |
if k in source_data:
|
| 546 |
track.gpt_data[k] = source_data[k]
|
| 547 |
+
|
| 548 |
# 2. From Track to Source (Forward fill logic handled in output construction)
|
| 549 |
|
| 550 |
+
def inject_metadata(self, tracked_dets):
|
| 551 |
+
"""Push metadata from post-processed detection dicts back into internal STrack objects.
|
| 552 |
+
|
| 553 |
+
Needed because GPT results are added to detection dicts *after* tracker.update()
|
| 554 |
+
returns, so the tracker's internal state doesn't have GPT data unless we
|
| 555 |
+
explicitly push it back in.
|
| 556 |
+
"""
|
| 557 |
+
meta_by_tid = {}
|
| 558 |
+
for d in tracked_dets:
|
| 559 |
+
tid = d.get('track_id')
|
| 560 |
+
if not tid:
|
| 561 |
+
continue
|
| 562 |
+
meta = {k: d[k] for k in GPT_SYNC_KEYS if k in d}
|
| 563 |
+
if meta:
|
| 564 |
+
meta_by_tid[tid] = meta
|
| 565 |
+
for track in self.tracked_stracks:
|
| 566 |
+
tid_str = f"T{str(track.track_id).zfill(2)}"
|
| 567 |
+
if tid_str in meta_by_tid:
|
| 568 |
+
track.gpt_data.update(meta_by_tid[tid_str])
|
| 569 |
+
|
| 570 |
|
| 571 |
# --- Helper Functions ---
|
| 572 |
|