Zhen Ye Claude Opus 4.6 commited on
Commit
0ace9ca
Β·
1 Parent(s): f09ca9c

fix: keep track positions fresh during pause for always-clickable bboxes

Browse files

Decouple track prediction and backend sync from the play/pause guard so
hit-test data stays valid regardless of playback state. Add 1.5% hit
margin around bboxes to account for visual-vs-tracked position drift.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (2) hide show
  1. frontend/js/main.js +42 -47
  2. frontend/js/ui/overlays.js +4 -1
frontend/js/main.js CHANGED
@@ -688,27 +688,23 @@ document.addEventListener("DOMContentLoaded", () => {
688
  const dt = Math.min((t - state.tracker.lastFrameTime) / 1000, 0.1);
689
  state.tracker.lastFrameTime = t;
690
 
691
- // Update tracker when engaged
692
- if (state.tracker.running && videoEngage && !videoEngage.paused) {
693
- state.tracker.frameCount++;
694
-
695
  if (APP.core.demo.active && APP.core.demo.data) {
696
- // DEMO MODE (keep existing demo track logic unchanged)
697
  const demoTracks = getDemoFrameData(videoEngage.currentTime);
698
  if (demoTracks) {
699
- // Deep clone to avoid mutating source data
700
  const tracksClone = JSON.parse(JSON.stringify(demoTracks));
701
 
702
  state.tracker.tracks = tracksClone.map(d => ({
703
  ...d,
704
- // Ensure defaults
705
  lastSeen: t,
706
  state: "TRACK",
707
  depth_valid: true,
708
  depth_est_m: d.gpt_distance_m || 1000,
709
  }));
710
 
711
- // Normalize if needed (frontend usually expects 0..1)
712
  const w = videoEngage.videoWidth || state.frame.w || 1280;
713
  const h = videoEngage.videoHeight || state.frame.h || 720;
714
 
@@ -722,57 +718,56 @@ document.addEventListener("DOMContentLoaded", () => {
722
  });
723
  }
724
  } else {
725
- // ── NORMAL MODE ──
726
-
727
- // (1) Every frame: smooth overlay animation
728
  predictTracks(dt);
729
 
730
- // (2) Every ~10 frames (333ms): backend sync for accurate positions
731
  const jobId = state.hf.asyncJobId || state.hf.completedJobId;
732
  if (jobId && (t - state.tracker.lastHFSync > 333)) {
733
  const frameIdx = Math.floor(videoEngage.currentTime * 30);
734
  APP.core.tracker.syncWithBackend(frameIdx);
735
  state.tracker.lastHFSync = t;
736
  }
 
 
737
 
738
- // (3) Every 40 frames OR new object: render cards + fire GPT
739
- const framesSinceRender = state.tracker.frameCount - state.tracker._lastCardRenderFrame;
740
- if (state.tracker._newObjectDetected || framesSinceRender >= 40) {
741
- // Immediate card render (current positions/labels, pre-GPT)
742
- renderFrameTrackList();
743
- state.tracker._lastCardRenderFrame = state.tracker.frameCount;
744
- state.tracker._newObjectDetected = false;
745
-
746
- // Fire async GPT analysis (non-blocking)
747
- if (!state.tracker._gptBusy && state.tracker.tracks.length > 0) {
748
- state.tracker._gptBusy = true;
749
- APP.api.client.analyzeFrame(videoEngage, state.tracker.tracks)
750
- .then(enriched => {
751
- // Merge GPT results into state.detections
752
- for (const rd of enriched) {
753
- const tid = rd.track_id || rd.id;
754
- const existing = (state.detections || []).find(d => d.id === tid);
755
- if (existing && rd.gpt_raw) {
756
- existing.gpt_raw = rd.gpt_raw;
757
- existing.features = APP.core.gptMapping.buildFeatures(rd.gpt_raw);
758
- existing.assessment_status = rd.assessment_status || "ASSESSED";
759
- existing.threat_level_score = rd.threat_level_score || 0;
760
- existing.gpt_description = rd.gpt_description || existing.gpt_description;
761
- existing.gpt_distance_m = rd.gpt_distance_m || existing.gpt_distance_m;
762
- existing.gpt_direction = rd.gpt_direction || existing.gpt_direction;
763
- }
764
  }
765
- renderFrameTrackList(); // Re-render with GPT data
766
- state.tracker._gptBusy = false;
767
- })
768
- .catch(err => {
769
- console.warn("Frame GPT analysis failed:", err);
770
- state.tracker._gptBusy = false;
771
- });
772
- }
773
  }
774
  }
775
- } // End if(running)
776
 
777
  // Render UI
778
  if (renderFrameOverlay) renderFrameOverlay();
 
688
  const dt = Math.min((t - state.tracker.lastFrameTime) / 1000, 0.1);
689
  state.tracker.lastFrameTime = t;
690
 
691
+ // ── Always keep track positions fresh (playing OR paused) ──
692
+ // This ensures bboxes remain clickable regardless of playback state.
693
+ if (state.tracker.running && videoEngage && state.tracker.tracks.length > 0) {
 
694
  if (APP.core.demo.active && APP.core.demo.data) {
695
+ // DEMO MODE: sync tracks to current video time (even when paused)
696
  const demoTracks = getDemoFrameData(videoEngage.currentTime);
697
  if (demoTracks) {
 
698
  const tracksClone = JSON.parse(JSON.stringify(demoTracks));
699
 
700
  state.tracker.tracks = tracksClone.map(d => ({
701
  ...d,
 
702
  lastSeen: t,
703
  state: "TRACK",
704
  depth_valid: true,
705
  depth_est_m: d.gpt_distance_m || 1000,
706
  }));
707
 
 
708
  const w = videoEngage.videoWidth || state.frame.w || 1280;
709
  const h = videoEngage.videoHeight || state.frame.h || 720;
710
 
 
718
  });
719
  }
720
  } else {
721
+ // NORMAL MODE: predict positions every frame
 
 
722
  predictTracks(dt);
723
 
724
+ // Backend sync every 333ms (works while paused too)
725
  const jobId = state.hf.asyncJobId || state.hf.completedJobId;
726
  if (jobId && (t - state.tracker.lastHFSync > 333)) {
727
  const frameIdx = Math.floor(videoEngage.currentTime * 30);
728
  APP.core.tracker.syncWithBackend(frameIdx);
729
  state.tracker.lastHFSync = t;
730
  }
731
+ }
732
+ }
733
 
734
+ // ── Card rendering & GPT analysis: only during active playback ──
735
+ if (state.tracker.running && videoEngage && !videoEngage.paused) {
736
+ state.tracker.frameCount++;
737
+
738
+ const framesSinceRender = state.tracker.frameCount - state.tracker._lastCardRenderFrame;
739
+ if (state.tracker._newObjectDetected || framesSinceRender >= 40) {
740
+ renderFrameTrackList();
741
+ state.tracker._lastCardRenderFrame = state.tracker.frameCount;
742
+ state.tracker._newObjectDetected = false;
743
+
744
+ if (!state.tracker._gptBusy && state.tracker.tracks.length > 0) {
745
+ state.tracker._gptBusy = true;
746
+ APP.api.client.analyzeFrame(videoEngage, state.tracker.tracks)
747
+ .then(enriched => {
748
+ for (const rd of enriched) {
749
+ const tid = rd.track_id || rd.id;
750
+ const existing = (state.detections || []).find(d => d.id === tid);
751
+ if (existing && rd.gpt_raw) {
752
+ existing.gpt_raw = rd.gpt_raw;
753
+ existing.features = APP.core.gptMapping.buildFeatures(rd.gpt_raw);
754
+ existing.assessment_status = rd.assessment_status || "ASSESSED";
755
+ existing.threat_level_score = rd.threat_level_score || 0;
756
+ existing.gpt_description = rd.gpt_description || existing.gpt_description;
757
+ existing.gpt_distance_m = rd.gpt_distance_m || existing.gpt_distance_m;
758
+ existing.gpt_direction = rd.gpt_direction || existing.gpt_direction;
 
759
  }
760
+ }
761
+ renderFrameTrackList();
762
+ state.tracker._gptBusy = false;
763
+ })
764
+ .catch(err => {
765
+ console.warn("Frame GPT analysis failed:", err);
766
+ state.tracker._gptBusy = false;
767
+ });
768
  }
769
  }
770
+ }
771
 
772
  // Render UI
773
  if (renderFrameOverlay) renderFrameOverlay();
frontend/js/ui/overlays.js CHANGED
@@ -84,13 +84,16 @@ APP.ui.overlays.initClickHandler = function () {
84
  const ny = (e.clientY - rect.top) / rect.height;
85
 
86
  // Hit-test against track bboxes (smallest area wins for overlaps)
 
 
87
  let best = null;
88
  let bestArea = Infinity;
89
 
90
  for (const t of tracks) {
91
  const b = t.bbox;
92
  if (!b) continue;
93
- if (nx >= b.x && nx <= b.x + b.w && ny >= b.y && ny <= b.y + b.h) {
 
94
  const area = b.w * b.h;
95
  if (area < bestArea) {
96
  bestArea = area;
 
84
  const ny = (e.clientY - rect.top) / rect.height;
85
 
86
  // Hit-test against track bboxes (smallest area wins for overlaps)
87
+ // Margin accounts for drift between visual bbox and tracked position
88
+ const margin = 0.015;
89
  let best = null;
90
  let bestArea = Infinity;
91
 
92
  for (const t of tracks) {
93
  const b = t.bbox;
94
  if (!b) continue;
95
+ if (nx >= b.x - margin && nx <= b.x + b.w + margin &&
96
+ ny >= b.y - margin && ny <= b.y + b.h + margin) {
97
  const area = b.w * b.h;
98
  if (area < bestArea) {
99
  bestArea = area;