Zhen Ye commited on
Commit
e16d22f
·
1 Parent(s): 6f9d10a

feat: Implement Tab 2 GPT-based ranging and remove legacy depth calc

Browse files
Files changed (2) hide show
  1. LaserPerception/LaserPerception.js +92 -18
  2. app.py +38 -0
LaserPerception/LaserPerception.js CHANGED
@@ -92,6 +92,9 @@
92
  }
93
  };
94
 
 
 
 
95
  // ========= Elements =========
96
  const sysDot = $("#sys-dot");
97
  const sysStatus = $("#sys-status");
@@ -2541,6 +2544,7 @@
2541
  state.tracker.beamOn = true;
2542
  state.tracker.lastDetTime = 0;
2543
  state.tracker.lastFrameTime = now();
 
2544
  engageNote.textContent = "Running";
2545
  chipBeam.textContent = "BEAM:ON";
2546
  log("Engage started: tracking enabled, dwell accumulation active.", "g");
@@ -2868,17 +2872,78 @@
2868
  }
2869
 
2870
  function rangeFromArea(track) {
2871
- const w = videoEngage.videoWidth || state.frame.w;
2872
- const h = videoEngage.videoHeight || state.frame.h;
2873
- const a = (track.bbox.w * track.bbox.h) / (w * h);
2874
- const baseA = Math.max(1e-6, track.baseAreaFrac || a);
2875
- const rel = Math.sqrt(baseA / Math.max(1e-6, a));
2876
- return clamp(track.baseRange_m * rel, 80, 16000);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2877
  }
2878
 
2879
  function getTrackDisplayRange(track) {
2880
- const areaRange = rangeFromArea(track);
2881
- return getDisplayRange(track, areaRange);
 
 
 
 
 
 
 
 
2882
  }
2883
 
2884
  function dwellFromRange(track, range_m) {
@@ -2896,7 +2961,7 @@
2896
  let best = null;
2897
  state.tracker.tracks.forEach(tr => {
2898
  if (tr.killed) return;
2899
- const range = rangeFromArea(tr);
2900
  const mp = maxPowerAtTarget(range);
2901
  const margin = mp.Ptar - (tr.reqP_kW || 0);
2902
  const dwell = dwellFromRange(tr, range);
@@ -2920,7 +2985,10 @@
2920
  const tr = state.tracker.tracks.find(t => t.id === targetId);
2921
  if (!tr || tr.killed) return;
2922
 
2923
- const range = rangeFromArea(tr);
 
 
 
2924
  const reqD = dwellFromRange(tr, range);
2925
 
2926
  // state machine: TRACK -> SETTLE -> FIRE -> ASSESS -> KILL
@@ -3008,8 +3076,15 @@
3008
 
3009
  chipTracks.textContent = `TRACKS:${state.tracker.tracks.filter(t => !t.killed).length}`;
3010
  liveStamp.textContent = new Date().toLocaleTimeString();
 
 
 
 
 
 
3011
  }
3012
  tick();
 
3013
  }
3014
 
3015
  function renderEngageOverlay() {
@@ -3036,8 +3111,8 @@
3036
  const ax = b.x + b.w * tr.aimRel.relx;
3037
  const ay = b.y + b.h * tr.aimRel.rely;
3038
 
3039
- const range = rangeFromArea(tr);
3040
  const displayRange = getTrackDisplayRange(tr);
 
3041
  const reqD = dwellFromRange(tr, range);
3042
 
3043
  const mp = maxPowerAtTarget(range);
@@ -3109,8 +3184,8 @@
3109
  }
3110
 
3111
  alive.forEach(tr => {
3112
- const range = rangeFromArea(tr);
3113
  const displayRange = getTrackDisplayRange(tr);
 
3114
  const rangeTxt = Number.isFinite(displayRange.range)
3115
  ? `${Math.round(displayRange.range)}m (${displayRange.source})`
3116
  : "—";
@@ -3346,7 +3421,7 @@
3346
  // Start loop immediately
3347
  requestAnimationFrame(renderFrameRadar);
3348
 
3349
- // ========= Radar rendering (Tab 2) - Aligned with Tab 1 Scale/FOV =========
3350
  function renderRadar() {
3351
  const ctx = radarCanvas.getContext("2d");
3352
  const rect = radarCanvas.getBoundingClientRect();
@@ -3384,7 +3459,7 @@
3384
  // Sweep Animation
3385
  const t = now() / 1500; // Match Tab 1 speed (slower)
3386
  const ang = (t * (Math.PI * 2)) % (Math.PI * 2);
3387
-
3388
  // Gradient Sweep
3389
  const grad = ctx.createConicGradient(ang + Math.PI / 2, cx, cy);
3390
  grad.addColorStop(0, "transparent");
@@ -3419,12 +3494,11 @@
3419
  const tracks = state.tracker.tracks;
3420
  tracks.forEach(tr => {
3421
  // Range Logic (Matches Tab 1)
3422
- const areaRange = rangeFromArea(tr);
3423
  const displayRange = getTrackDisplayRange(tr);
3424
-
3425
  let dist = 3000;
3426
  if (Number.isFinite(displayRange.range)) dist = displayRange.range;
3427
- else dist = areaRange; // fallback
3428
 
3429
  // Scale: 0 -> 1500m (Matches Tab 1)
3430
  const maxRangeM = 1500;
@@ -3436,7 +3510,7 @@
3436
  const vw = videoEngage.videoWidth || state.frame.w || 1280;
3437
  const bx = tr.bbox.x + tr.bbox.w * 0.5;
3438
  const tx = (bx / vw) - 0.5; // -0.5 (left) to 0.5 (right)
3439
-
3440
  const fovRad = (60 * Math.PI) / 180;
3441
  const angle = (-Math.PI / 2) + (tx * fovRad);
3442
 
 
92
  }
93
  };
94
 
95
+ // Config: Update track reasoning every 30 frames
96
+ const REASON_INTERVAL = 30;
97
+
98
  // ========= Elements =========
99
  const sysDot = $("#sys-dot");
100
  const sysStatus = $("#sys-status");
 
2544
  state.tracker.beamOn = true;
2545
  state.tracker.lastDetTime = 0;
2546
  state.tracker.lastFrameTime = now();
2547
+ state.tracker.frameCount = 0;
2548
  engageNote.textContent = "Running";
2549
  chipBeam.textContent = "BEAM:ON";
2550
  log("Engage started: tracking enabled, dwell accumulation active.", "g");
 
2872
  }
2873
 
2874
  function rangeFromArea(track) {
2875
+ // [DELETED] "calculated" depth removed per user request.
2876
+ // Fallback only if GPT hasn't returned yet.
2877
+ return 1000;
2878
+ }
2879
+
2880
+ async function updateTracksWithGPT() {
2881
+ const activeTracks = state.tracker.tracks.filter(t => !t.killed);
2882
+ if (!activeTracks.length) return;
2883
+
2884
+ // Take a snapshot of the current video frame
2885
+ const c = document.createElement("canvas");
2886
+ c.width = videoEngage.videoWidth || state.frame.w;
2887
+ c.height = videoEngage.videoHeight || state.frame.h;
2888
+ const ctx = c.getContext("2d");
2889
+ ctx.drawImage(videoEngage, 0, 0, c.width, c.height);
2890
+
2891
+ const blob = await new Promise(r => c.toBlob(r, 'image/jpeg', 0.85));
2892
+
2893
+ // Prepare tracks payload
2894
+ // Backend expects: [{"id":..., "bbox":[x,y,w,h], "label":...}]
2895
+ const tracksPayload = activeTracks.map(t => ({
2896
+ id: t.id,
2897
+ bbox: [Math.round(t.bbox.x), Math.round(t.bbox.y), Math.round(t.bbox.w), Math.round(t.bbox.h)],
2898
+ label: t.label
2899
+ }));
2900
+
2901
+ const fd = new FormData();
2902
+ fd.append("frame", blob, "scan.jpg");
2903
+ fd.append("tracks", JSON.stringify(tracksPayload));
2904
+
2905
+ log(`Requesting GPT reasoning for ${activeTracks.length} tracks...`, "t");
2906
+
2907
+ try {
2908
+ const res = await fetch(`${state.hf.baseUrl}/reason/track`, {
2909
+ method: "POST",
2910
+ body: fd
2911
+ });
2912
+ if (res.ok) {
2913
+ const data = await res.json(); // { "T01": { "distance_m": 450, "description": "..." }, ... }
2914
+ let updatedCount = 0;
2915
+
2916
+ // Merge into state
2917
+ Object.keys(data).forEach(tid => {
2918
+ const info = data[tid];
2919
+ const track = state.tracker.tracks.find(t => t.id === tid);
2920
+ if (track) {
2921
+ if (info.distance_m) track.gpt_distance_m = info.distance_m;
2922
+ if (info.description) track.gpt_description = info.description;
2923
+ updatedCount++;
2924
+ }
2925
+ });
2926
+ log(`GPT updated ${updatedCount} tracks.`, "g");
2927
+ renderTrackCards(); // Force refresh UI
2928
+ } else {
2929
+ console.warn("GPT reason failed", res.status);
2930
+ }
2931
+ } catch (e) {
2932
+ console.error("GPT reason error", e);
2933
+ }
2934
  }
2935
 
2936
  function getTrackDisplayRange(track) {
2937
+ // Priority: GPT > DepthModel (if enabled) > fallback
2938
+ if (track.gpt_distance_m) {
2939
+ return { range: track.gpt_distance_m, source: "GPT" };
2940
+ }
2941
+ // User requested removing "calculated" depth.
2942
+ // We still support depth model if valid (optional).
2943
+ if (hasValidDepth(track)) {
2944
+ return { range: track.depth_est_m, source: "Lidar" };
2945
+ }
2946
+ return { range: null, source: "Wait..." };
2947
  }
2948
 
2949
  function dwellFromRange(track, range_m) {
 
2961
  let best = null;
2962
  state.tracker.tracks.forEach(tr => {
2963
  if (tr.killed) return;
2964
+ const range = getTrackDisplayRange(tr).range || 1000;
2965
  const mp = maxPowerAtTarget(range);
2966
  const margin = mp.Ptar - (tr.reqP_kW || 0);
2967
  const dwell = dwellFromRange(tr, range);
 
2985
  const tr = state.tracker.tracks.find(t => t.id === targetId);
2986
  if (!tr || tr.killed) return;
2987
 
2988
+ if (!tr || tr.killed) return;
2989
+
2990
+ const disp = getTrackDisplayRange(tr);
2991
+ const range = disp.range || 1000;
2992
  const reqD = dwellFromRange(tr, range);
2993
 
2994
  // state machine: TRACK -> SETTLE -> FIRE -> ASSESS -> KILL
 
3076
 
3077
  chipTracks.textContent = `TRACKS:${state.tracker.tracks.filter(t => !t.killed).length}`;
3078
  liveStamp.textContent = new Date().toLocaleTimeString();
3079
+
3080
+ // GPT Update Loop
3081
+ state.tracker.frameCount++;
3082
+ if (state.tracker.frameCount % REASON_INTERVAL === 0) {
3083
+ updateTracksWithGPT().catch(e => console.error(e));
3084
+ }
3085
  }
3086
  tick();
3087
+
3088
  }
3089
 
3090
  function renderEngageOverlay() {
 
3111
  const ax = b.x + b.w * tr.aimRel.relx;
3112
  const ay = b.y + b.h * tr.aimRel.rely;
3113
 
 
3114
  const displayRange = getTrackDisplayRange(tr);
3115
+ const range = displayRange.range || 1000;
3116
  const reqD = dwellFromRange(tr, range);
3117
 
3118
  const mp = maxPowerAtTarget(range);
 
3184
  }
3185
 
3186
  alive.forEach(tr => {
 
3187
  const displayRange = getTrackDisplayRange(tr);
3188
+ const range = displayRange.range || 1000;
3189
  const rangeTxt = Number.isFinite(displayRange.range)
3190
  ? `${Math.round(displayRange.range)}m (${displayRange.source})`
3191
  : "—";
 
3421
  // Start loop immediately
3422
  requestAnimationFrame(renderFrameRadar);
3423
 
3424
+ // ========= Radar rendering (Tab 2) - Aligned with Tab 1 Scale/FOV =========
3425
  function renderRadar() {
3426
  const ctx = radarCanvas.getContext("2d");
3427
  const rect = radarCanvas.getBoundingClientRect();
 
3459
  // Sweep Animation
3460
  const t = now() / 1500; // Match Tab 1 speed (slower)
3461
  const ang = (t * (Math.PI * 2)) % (Math.PI * 2);
3462
+
3463
  // Gradient Sweep
3464
  const grad = ctx.createConicGradient(ang + Math.PI / 2, cx, cy);
3465
  grad.addColorStop(0, "transparent");
 
3494
  const tracks = state.tracker.tracks;
3495
  tracks.forEach(tr => {
3496
  // Range Logic (Matches Tab 1)
 
3497
  const displayRange = getTrackDisplayRange(tr);
3498
+
3499
  let dist = 3000;
3500
  if (Number.isFinite(displayRange.range)) dist = displayRange.range;
3501
+ // else remain at default "unknown" distance (far out) until GPT returns
3502
 
3503
  // Scale: 0 -> 1500m (Matches Tab 1)
3504
  const maxRangeM = 1500;
 
3510
  const vw = videoEngage.videoWidth || state.frame.w || 1280;
3511
  const bx = tr.bbox.x + tr.bbox.w * 0.5;
3512
  const tx = (bx / vw) - 0.5; // -0.5 (left) to 0.5 (right)
3513
+
3514
  const fovRad = (60 * Math.PI) / 180;
3515
  const angle = (-Math.PI / 2) + (tx * fovRad);
3516
 
app.py CHANGED
@@ -54,6 +54,7 @@ from jobs.storage import (
54
  get_job_storage,
55
  get_output_video_path,
56
  )
 
57
 
58
  logging.basicConfig(level=logging.INFO)
59
 
@@ -564,5 +565,42 @@ async def stream_video(job_id: str):
564
  )
565
 
566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  if __name__ == "__main__":
568
  uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)
 
54
  get_job_storage,
55
  get_output_video_path,
56
  )
57
+ from utils.gpt_distance import estimate_distance_gpt
58
 
59
  logging.basicConfig(level=logging.INFO)
60
 
 
565
  )
566
 
567
 
568
+ @app.post("/reason/track")
569
+ async def reason_track(
570
+ frame: UploadFile = File(...),
571
+ tracks: str = Form(...) # JSON string of tracks: [{"id": "T01", "bbox": [x,y,w,h], "label": "car"}, ...]
572
+ ):
573
+ """
574
+ Reason about specific tracks in a frame using GPT.
575
+ Returns distance and description for each object ID.
576
+ """
577
+ import json
578
+ try:
579
+ input_path = _save_upload_to_tmp(frame)
580
+ except Exception:
581
+ raise HTTPException(status_code=500, detail="Failed to save uploaded frame")
582
+
583
+ try:
584
+ track_list = json.loads(tracks)
585
+ except json.JSONDecodeError:
586
+ _safe_delete(input_path)
587
+ raise HTTPException(status_code=400, detail="Invalid tracks JSON")
588
+
589
+ # Run GPT estimation
590
+ # This is blocking, but that's expected for this endpoint structure.
591
+ # For high concurrency, might want to offload to threadpool or async wrapper.
592
+ try:
593
+ # estimate_distance_gpt reads the file from disk
594
+ results = await asyncio.to_thread(estimate_distance_gpt, input_path, track_list)
595
+ except Exception as e:
596
+ logging.exception("GPT reasoning failed")
597
+ _safe_delete(input_path)
598
+ raise HTTPException(status_code=500, detail=str(e))
599
+
600
+ _safe_delete(input_path)
601
+ return results
602
+
603
+
604
+
605
  if __name__ == "__main__":
606
  uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=False)