Spaces:
Running
on
A10G
Running
on
A10G
Zhen Ye
Claude Opus 4.6
commited on
Commit
·
6a99834
1
Parent(s):
2f284f5
fix: track cards update during playback + periodic GPT re-analysis
Browse filesFix 4 interconnected bugs preventing track card updates during video
playback: normal tracking code trapped inside demo-mode guard,
renderFrameTrackList never called in animation loop, state.detections/
tracks disconnect, and asyncJobId nulled after completion blocking
syncWithBackend. Cards now update every 40 frames with GPT re-analysis
firing async on each cycle. New /detect/analyze-frame endpoint provides
per-frame GPT threat assessment.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- app.py +53 -0
- frontend/js/api/client.js +41 -0
- frontend/js/core/state.js +4 -1
- frontend/js/core/tracker.js +9 -6
- frontend/js/core/video.js +1 -0
- frontend/js/main.js +51 -22
app.py
CHANGED
|
@@ -599,6 +599,59 @@ async def get_frame_tracks(job_id: str, frame_idx: int):
|
|
| 599 |
return data or []
|
| 600 |
|
| 601 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 602 |
@app.delete("/detect/job/{job_id}")
|
| 603 |
async def cancel_job(job_id: str):
|
| 604 |
"""Cancel a running job."""
|
|
|
|
| 599 |
return data or []
|
| 600 |
|
| 601 |
|
| 602 |
+
@app.post("/detect/analyze-frame")
|
| 603 |
+
async def analyze_frame(
|
| 604 |
+
image: UploadFile = File(...),
|
| 605 |
+
detections: str = Form(...),
|
| 606 |
+
job_id: str = Form(None),
|
| 607 |
+
):
|
| 608 |
+
"""Run GPT threat assessment on a single video frame."""
|
| 609 |
+
import json as json_module
|
| 610 |
+
from utils.gpt_reasoning import encode_frame_to_b64
|
| 611 |
+
|
| 612 |
+
dets = json_module.loads(detections)
|
| 613 |
+
|
| 614 |
+
# Look up mission_spec from stored job (if available)
|
| 615 |
+
mission_spec = None
|
| 616 |
+
if job_id:
|
| 617 |
+
job = get_job_storage().get(job_id)
|
| 618 |
+
if job:
|
| 619 |
+
mission_spec = job.mission_spec
|
| 620 |
+
|
| 621 |
+
# Decode uploaded image
|
| 622 |
+
image_bytes = await image.read()
|
| 623 |
+
nparr = np.frombuffer(image_bytes, np.uint8)
|
| 624 |
+
frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
| 625 |
+
if frame is None:
|
| 626 |
+
raise HTTPException(status_code=400, detail="Invalid image")
|
| 627 |
+
|
| 628 |
+
# Run GPT in thread pool (blocking OpenAI API call)
|
| 629 |
+
frame_b64 = encode_frame_to_b64(frame)
|
| 630 |
+
async with _GPT_SEMAPHORE:
|
| 631 |
+
gpt_results = await asyncio.to_thread(
|
| 632 |
+
estimate_threat_gpt,
|
| 633 |
+
detections=dets,
|
| 634 |
+
mission_spec=mission_spec,
|
| 635 |
+
image_b64=frame_b64,
|
| 636 |
+
)
|
| 637 |
+
|
| 638 |
+
# Merge GPT results into detection records
|
| 639 |
+
for d in dets:
|
| 640 |
+
oid = d.get("track_id") or d.get("id")
|
| 641 |
+
if oid and oid in gpt_results:
|
| 642 |
+
payload = gpt_results[oid]
|
| 643 |
+
d["gpt_raw"] = payload
|
| 644 |
+
d["assessment_status"] = payload.get("assessment_status", "ASSESSED")
|
| 645 |
+
d["threat_level_score"] = payload.get("threat_level_score", 0)
|
| 646 |
+
d["threat_classification"] = payload.get("threat_classification", "Unknown")
|
| 647 |
+
d["weapon_readiness"] = payload.get("weapon_readiness", "Unknown")
|
| 648 |
+
d["gpt_description"] = payload.get("gpt_description")
|
| 649 |
+
d["gpt_distance_m"] = payload.get("gpt_distance_m")
|
| 650 |
+
d["gpt_direction"] = payload.get("gpt_direction")
|
| 651 |
+
|
| 652 |
+
return dets
|
| 653 |
+
|
| 654 |
+
|
| 655 |
@app.delete("/detect/job/{job_id}")
|
| 656 |
async def cancel_job(job_id: str):
|
| 657 |
"""Cancel a running job."""
|
frontend/js/api/client.js
CHANGED
|
@@ -198,6 +198,7 @@ APP.api.client.pollAsyncJob = async function () {
|
|
| 198 |
await fetchDepthFirstFrame();
|
| 199 |
|
| 200 |
clearInterval(state.hf.asyncPollInterval);
|
|
|
|
| 201 |
state.hf.asyncJobId = null;
|
| 202 |
setHfStatus("ready");
|
| 203 |
resolve();
|
|
@@ -306,6 +307,46 @@ APP.api.client.callHfObjectDetection = async function (canvas) {
|
|
| 306 |
return await resp.json();
|
| 307 |
};
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
// Chat about threats using GPT
|
| 310 |
APP.api.client.chatAboutThreats = async function (question, detections) {
|
| 311 |
const { state } = APP.core;
|
|
|
|
| 198 |
await fetchDepthFirstFrame();
|
| 199 |
|
| 200 |
clearInterval(state.hf.asyncPollInterval);
|
| 201 |
+
state.hf.completedJobId = state.hf.asyncJobId; // preserve for post-completion sync
|
| 202 |
state.hf.asyncJobId = null;
|
| 203 |
setHfStatus("ready");
|
| 204 |
resolve();
|
|
|
|
| 307 |
return await resp.json();
|
| 308 |
};
|
| 309 |
|
| 310 |
+
// Capture current video frame and send to backend for GPT analysis
|
| 311 |
+
APP.api.client.analyzeFrame = async function (videoEl, tracks) {
|
| 312 |
+
const { state } = APP.core;
|
| 313 |
+
const { canvasToBlob } = APP.core.utils;
|
| 314 |
+
|
| 315 |
+
// Capture current video frame
|
| 316 |
+
const canvas = document.createElement("canvas");
|
| 317 |
+
canvas.width = videoEl.videoWidth;
|
| 318 |
+
canvas.height = videoEl.videoHeight;
|
| 319 |
+
canvas.getContext("2d").drawImage(videoEl, 0, 0);
|
| 320 |
+
const blob = await canvasToBlob(canvas);
|
| 321 |
+
|
| 322 |
+
// Convert normalized bbox (0-1) back to pixel coords for backend
|
| 323 |
+
const w = canvas.width, h = canvas.height;
|
| 324 |
+
const dets = tracks.map(t => ({
|
| 325 |
+
track_id: t.id,
|
| 326 |
+
label: t.label,
|
| 327 |
+
bbox: [
|
| 328 |
+
Math.round(t.bbox.x * w),
|
| 329 |
+
Math.round(t.bbox.y * h),
|
| 330 |
+
Math.round((t.bbox.x + t.bbox.w) * w),
|
| 331 |
+
Math.round((t.bbox.y + t.bbox.h) * h),
|
| 332 |
+
],
|
| 333 |
+
score: t.score,
|
| 334 |
+
}));
|
| 335 |
+
|
| 336 |
+
const form = new FormData();
|
| 337 |
+
form.append("image", blob, "frame.jpg");
|
| 338 |
+
form.append("detections", JSON.stringify(dets));
|
| 339 |
+
const jobId = state.hf.asyncJobId || state.hf.completedJobId;
|
| 340 |
+
if (jobId) form.append("job_id", jobId);
|
| 341 |
+
|
| 342 |
+
const resp = await fetch(`${state.hf.baseUrl}/detect/analyze-frame`, {
|
| 343 |
+
method: "POST",
|
| 344 |
+
body: form,
|
| 345 |
+
});
|
| 346 |
+
if (!resp.ok) throw new Error(`Frame analysis failed: ${resp.statusText}`);
|
| 347 |
+
return await resp.json();
|
| 348 |
+
};
|
| 349 |
+
|
| 350 |
// Chat about threats using GPT
|
| 351 |
APP.api.client.chatAboutThreats = async function (question, detections) {
|
| 352 |
const { state } = APP.core;
|
frontend/js/core/state.js
CHANGED
|
@@ -14,6 +14,7 @@ APP.core.state = {
|
|
| 14 |
baseUrl: (window.API_CONFIG?.BACKEND_BASE || window.API_CONFIG?.BASE_URL || "").replace(/\/$/, "") || window.location.origin,
|
| 15 |
detector: "auto",
|
| 16 |
asyncJobId: null, // Current job ID from /detect/async
|
|
|
|
| 17 |
asyncPollInterval: null, // Polling timer handle
|
| 18 |
firstFrameUrl: null, // First frame preview URL
|
| 19 |
firstFrameDetections: null, // First-frame detections from backend
|
|
@@ -56,7 +57,9 @@ APP.core.state = {
|
|
| 56 |
selectedTrackId: null,
|
| 57 |
beamOn: false,
|
| 58 |
lastFrameTime: 0,
|
| 59 |
-
frameCount: 0
|
|
|
|
|
|
|
| 60 |
},
|
| 61 |
|
| 62 |
frame: {
|
|
|
|
| 14 |
baseUrl: (window.API_CONFIG?.BACKEND_BASE || window.API_CONFIG?.BASE_URL || "").replace(/\/$/, "") || window.location.origin,
|
| 15 |
detector: "auto",
|
| 16 |
asyncJobId: null, // Current job ID from /detect/async
|
| 17 |
+
completedJobId: null, // Preserved job ID for post-completion track sync
|
| 18 |
asyncPollInterval: null, // Polling timer handle
|
| 19 |
firstFrameUrl: null, // First frame preview URL
|
| 20 |
firstFrameDetections: null, // First-frame detections from backend
|
|
|
|
| 57 |
selectedTrackId: null,
|
| 58 |
beamOn: false,
|
| 59 |
lastFrameTime: 0,
|
| 60 |
+
frameCount: 0,
|
| 61 |
+
_lastCardRenderFrame: 0, // Frame count at last card render
|
| 62 |
+
_gptBusy: false // Prevent overlapping GPT calls
|
| 63 |
},
|
| 64 |
|
| 65 |
frame: {
|
frontend/js/core/tracker.js
CHANGED
|
@@ -156,7 +156,7 @@ APP.core.tracker.matchAndUpdateTracks = function (dets, dtSec) {
|
|
| 156 |
APP.core.tracker.syncWithBackend = async function (frameIdx) {
|
| 157 |
const { state } = APP.core;
|
| 158 |
const { $ } = APP.core.utils;
|
| 159 |
-
const jobId = state.hf.asyncJobId;
|
| 160 |
|
| 161 |
if (!jobId || !state.hf.baseUrl) return;
|
| 162 |
|
|
@@ -242,15 +242,18 @@ APP.core.tracker.syncWithBackend = async function (frameIdx) {
|
|
| 242 |
}
|
| 243 |
}
|
| 244 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
// Update state
|
| 246 |
state.tracker.tracks = newTracks;
|
| 247 |
state.detections = newTracks; // Keep synced
|
| 248 |
|
| 249 |
-
// Re-render track cards (same renderer as Tab 1)
|
| 250 |
-
if (APP.ui.cards && APP.ui.cards.renderFrameTrackList) {
|
| 251 |
-
APP.ui.cards.renderFrameTrackList();
|
| 252 |
-
}
|
| 253 |
-
|
| 254 |
} catch (e) {
|
| 255 |
console.warn("Track sync failed", e);
|
| 256 |
}
|
|
|
|
| 156 |
APP.core.tracker.syncWithBackend = async function (frameIdx) {
|
| 157 |
const { state } = APP.core;
|
| 158 |
const { $ } = APP.core.utils;
|
| 159 |
+
const jobId = state.hf.asyncJobId || state.hf.completedJobId;
|
| 160 |
|
| 161 |
if (!jobId || !state.hf.baseUrl) return;
|
| 162 |
|
|
|
|
| 242 |
}
|
| 243 |
}
|
| 244 |
|
| 245 |
+
// Detect new objects before state update
|
| 246 |
+
const oldIds = new Set(state.tracker.tracks.map(t => t.id));
|
| 247 |
+
const brandNew = newTracks.filter(t => !oldIds.has(t.id));
|
| 248 |
+
if (brandNew.length > 0) {
|
| 249 |
+
state.tracker._newObjectDetected = true;
|
| 250 |
+
APP.ui.logging.log(`New objects: ${brandNew.map(t => t.id).join(", ")}`, "t");
|
| 251 |
+
}
|
| 252 |
+
|
| 253 |
// Update state
|
| 254 |
state.tracker.tracks = newTracks;
|
| 255 |
state.detections = newTracks; // Keep synced
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
} catch (e) {
|
| 258 |
console.warn("Track sync failed", e);
|
| 259 |
}
|
frontend/js/core/video.js
CHANGED
|
@@ -118,6 +118,7 @@ APP.core.video.unloadVideo = async function (options = {}) {
|
|
| 118 |
state.hf.busy = false;
|
| 119 |
state.hf.lastError = null;
|
| 120 |
state.hf.asyncJobId = null;
|
|
|
|
| 121 |
state.hf.asyncStatus = "idle";
|
| 122 |
state.hf.firstFrameUrl = null;
|
| 123 |
state.hf.videoUrl = null;
|
|
|
|
| 118 |
state.hf.busy = false;
|
| 119 |
state.hf.lastError = null;
|
| 120 |
state.hf.asyncJobId = null;
|
| 121 |
+
state.hf.completedJobId = null;
|
| 122 |
state.hf.asyncStatus = "idle";
|
| 123 |
state.hf.firstFrameUrl = null;
|
| 124 |
state.hf.videoUrl = null;
|
frontend/js/main.js
CHANGED
|
@@ -620,6 +620,7 @@ document.addEventListener("DOMContentLoaded", () => {
|
|
| 620 |
state.isReasoning = false;
|
| 621 |
state.hf.busy = false;
|
| 622 |
state.hf.asyncJobId = null;
|
|
|
|
| 623 |
state.hf.asyncStatus = "cancelled";
|
| 624 |
|
| 625 |
// Re-enable Reason button
|
|
@@ -684,9 +685,10 @@ document.addEventListener("DOMContentLoaded", () => {
|
|
| 684 |
|
| 685 |
// Update tracker when engaged
|
| 686 |
if (state.tracker.running && videoEngage && !videoEngage.paused) {
|
|
|
|
| 687 |
|
| 688 |
-
// DEMO MODE BYPASS
|
| 689 |
if (APP.core.demo.active && APP.core.demo.data) {
|
|
|
|
| 690 |
const demoTracks = getDemoFrameData(videoEngage.currentTime);
|
| 691 |
if (demoTracks) {
|
| 692 |
// Deep clone to avoid mutating source data
|
|
@@ -706,35 +708,62 @@ document.addEventListener("DOMContentLoaded", () => {
|
|
| 706 |
const h = videoEngage.videoHeight || state.frame.h || 720;
|
| 707 |
|
| 708 |
state.tracker.tracks.forEach(tr => {
|
| 709 |
-
// Check if inputs are absolute pixels (if x > 1 or w > 1)
|
| 710 |
-
// We assume demo data is in pixels (as per spec)
|
| 711 |
if (tr.bbox.x > 1 || tr.bbox.w > 1) {
|
| 712 |
tr.bbox.x /= w;
|
| 713 |
tr.bbox.y /= h;
|
| 714 |
tr.bbox.w /= w;
|
| 715 |
tr.bbox.h /= h;
|
| 716 |
}
|
| 717 |
-
|
| 718 |
-
// Note: history in 'tr' is also in pixels in the source JSON.
|
| 719 |
-
// But we don't normalize history here because radar.js currently handles raw pixels for history?
|
| 720 |
-
// Actually, we should probably standardize everything to normalized if possible,
|
| 721 |
-
// but let's check radar.js first.
|
| 722 |
});
|
|
|
|
|
|
|
|
|
|
| 723 |
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
}
|
| 739 |
}
|
| 740 |
}
|
|
|
|
| 620 |
state.isReasoning = false;
|
| 621 |
state.hf.busy = false;
|
| 622 |
state.hf.asyncJobId = null;
|
| 623 |
+
state.hf.completedJobId = null;
|
| 624 |
state.hf.asyncStatus = "cancelled";
|
| 625 |
|
| 626 |
// Re-enable Reason button
|
|
|
|
| 685 |
|
| 686 |
// Update tracker when engaged
|
| 687 |
if (state.tracker.running && videoEngage && !videoEngage.paused) {
|
| 688 |
+
state.tracker.frameCount++;
|
| 689 |
|
|
|
|
| 690 |
if (APP.core.demo.active && APP.core.demo.data) {
|
| 691 |
+
// DEMO MODE (keep existing demo track logic unchanged)
|
| 692 |
const demoTracks = getDemoFrameData(videoEngage.currentTime);
|
| 693 |
if (demoTracks) {
|
| 694 |
// Deep clone to avoid mutating source data
|
|
|
|
| 708 |
const h = videoEngage.videoHeight || state.frame.h || 720;
|
| 709 |
|
| 710 |
state.tracker.tracks.forEach(tr => {
|
|
|
|
|
|
|
| 711 |
if (tr.bbox.x > 1 || tr.bbox.w > 1) {
|
| 712 |
tr.bbox.x /= w;
|
| 713 |
tr.bbox.y /= h;
|
| 714 |
tr.bbox.w /= w;
|
| 715 |
tr.bbox.h /= h;
|
| 716 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 717 |
});
|
| 718 |
+
}
|
| 719 |
+
} else {
|
| 720 |
+
// ── NORMAL MODE ──
|
| 721 |
|
| 722 |
+
// (1) Every frame: smooth overlay animation
|
| 723 |
+
predictTracks(dt);
|
| 724 |
+
|
| 725 |
+
// (2) Every ~10 frames (333ms): backend sync for accurate positions
|
| 726 |
+
const jobId = state.hf.asyncJobId || state.hf.completedJobId;
|
| 727 |
+
if (jobId && (t - state.tracker.lastHFSync > 333)) {
|
| 728 |
+
const frameIdx = Math.floor(videoEngage.currentTime * 30);
|
| 729 |
+
APP.core.tracker.syncWithBackend(frameIdx);
|
| 730 |
+
state.tracker.lastHFSync = t;
|
| 731 |
+
}
|
| 732 |
+
|
| 733 |
+
// (3) Every 40 frames OR new object: render cards + fire GPT
|
| 734 |
+
const framesSinceRender = state.tracker.frameCount - state.tracker._lastCardRenderFrame;
|
| 735 |
+
if (state.tracker._newObjectDetected || framesSinceRender >= 40) {
|
| 736 |
+
// Immediate card render (current positions/labels, pre-GPT)
|
| 737 |
+
renderFrameTrackList();
|
| 738 |
+
state.tracker._lastCardRenderFrame = state.tracker.frameCount;
|
| 739 |
+
state.tracker._newObjectDetected = false;
|
| 740 |
+
|
| 741 |
+
// Fire async GPT analysis (non-blocking)
|
| 742 |
+
if (!state.tracker._gptBusy && state.tracker.tracks.length > 0) {
|
| 743 |
+
state.tracker._gptBusy = true;
|
| 744 |
+
APP.api.client.analyzeFrame(videoEngage, state.tracker.tracks)
|
| 745 |
+
.then(enriched => {
|
| 746 |
+
// Merge GPT results into state.detections
|
| 747 |
+
for (const rd of enriched) {
|
| 748 |
+
const tid = rd.track_id || rd.id;
|
| 749 |
+
const existing = (state.detections || []).find(d => d.id === tid);
|
| 750 |
+
if (existing && rd.gpt_raw) {
|
| 751 |
+
existing.gpt_raw = rd.gpt_raw;
|
| 752 |
+
existing.features = APP.core.gptMapping.buildFeatures(rd.gpt_raw);
|
| 753 |
+
existing.assessment_status = rd.assessment_status || "ASSESSED";
|
| 754 |
+
existing.threat_level_score = rd.threat_level_score || 0;
|
| 755 |
+
existing.gpt_description = rd.gpt_description || existing.gpt_description;
|
| 756 |
+
existing.gpt_distance_m = rd.gpt_distance_m || existing.gpt_distance_m;
|
| 757 |
+
existing.gpt_direction = rd.gpt_direction || existing.gpt_direction;
|
| 758 |
+
}
|
| 759 |
+
}
|
| 760 |
+
renderFrameTrackList(); // Re-render with GPT data
|
| 761 |
+
state.tracker._gptBusy = false;
|
| 762 |
+
})
|
| 763 |
+
.catch(err => {
|
| 764 |
+
console.warn("Frame GPT analysis failed:", err);
|
| 765 |
+
state.tracker._gptBusy = false;
|
| 766 |
+
});
|
| 767 |
}
|
| 768 |
}
|
| 769 |
}
|