Zhen Ye commited on
Commit
64bbe44
·
1 Parent(s): 5c36daa

Refactor: Decouple depth estimator and suppress noisy logs

Browse files

- Backend: Remove heuristic depth calculation; depth model now only outputs relative depth for visualization.
- Frontend: Establish GPT as the sole authority for track distances.
- Config: Reduce httpx/huggingface logging noise to WARNING level.

Files changed (3) hide show
  1. LaserPerception/LaserPerception.js +44 -63
  2. app.py +6 -0
  3. inference.py +43 -71
LaserPerception/LaserPerception.js CHANGED
@@ -989,19 +989,18 @@
989
  bbox: { x: x1, y: y1, w: w, h: h },
990
  aim: { ...ap },
991
  features: null,
992
- baseRange_m: d.gpt_distance_m || d.depth_est_m || null, // Priority to GPT/Depth
993
  baseAreaFrac: null,
994
  baseDwell_s: null,
995
  reqP_kW: null,
996
  maxP_kW: null,
997
  pkill: null,
998
- // Pass through backend props
999
  gpt_distance_m: d.gpt_distance_m,
1000
  gpt_direction: d.gpt_direction,
1001
  gpt_description: d.gpt_description,
1002
- depth_est_m: d.depth_est_m,
1003
- depth_rel: d.depth_rel,
1004
- depth_valid: !!(d.depth_est_m || d.gpt_distance_m)
1005
  };
1006
  });
1007
  // Update UI components
@@ -1875,16 +1874,12 @@
1875
  const y1 = bbox[1] || 0;
1876
  const x2 = bbox[2] || 0;
1877
  const y2 = bbox[3] || 0;
1878
- const depthEst = Number.isFinite(d.depth_est_m) ? d.depth_est_m : null;
1879
  const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
1880
- const depthValid = d.depth_valid === true && depthEst !== null;
1881
  return {
1882
  bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
1883
  class: d.label || "drone",
1884
  score: d.score ?? 0,
1885
- depth_est_m: depthEst,
1886
- depth_rel: depthRel,
1887
- depth_valid: depthValid
1888
  };
1889
  });
1890
  }
@@ -1904,16 +1899,12 @@
1904
  const y1 = bbox[1] || 0;
1905
  const x2 = bbox[2] || 0;
1906
  const y2 = bbox[3] || 0;
1907
- const depthEst = Number.isFinite(d.depth_est_m) ? d.depth_est_m : null;
1908
  const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
1909
- const depthValid = d.depth_valid === true && depthEst !== null;
1910
  return {
1911
  bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
1912
  class: d.label || "object",
1913
  score: d.score ?? 0,
1914
- depth_est_m: depthEst,
1915
- depth_rel: depthRel,
1916
- depth_valid: depthValid
1917
  };
1918
  });
1919
  }
@@ -2124,9 +2115,8 @@
2124
  reqP_kW: null,
2125
  maxP_kW: null,
2126
  pkill: null,
2127
- depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
2128
- depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
2129
- depth_valid: d.depth_valid === true
2130
  };
2131
  });
2132
 
@@ -2341,17 +2331,8 @@
2341
 
2342
  if (det.gpt_distance_m) {
2343
  rangeStr = `${det.gpt_distance_m}m (GPT)`;
2344
- } else if (det.depth_est_m) {
2345
- rangeStr = `${Math.round(det.depth_est_m)}m (Lidar)`;
2346
- } else {
2347
- // Fallback
2348
- if (det.box) {
2349
- const [x1, y1, x2, y2] = det.box;
2350
- const area = ((x2 - x1) * (y2 - y1)) / (state.frame.w * state.frame.h);
2351
- const est = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
2352
- rangeStr = `~${Math.round(est)}m (Est)`;
2353
- }
2354
  }
 
2355
 
2356
  if (det.gpt_direction) {
2357
  bearingStr = det.gpt_direction;
@@ -2626,9 +2607,13 @@
2626
  baseRange_m: d.baseRange_m || +rangeBase.value,
2627
  baseDwell_s: d.baseDwell_s || 4.0,
2628
  reqP_kW: d.reqP_kW || 35,
2629
- depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
2630
  depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
2631
- depth_valid: d.depth_valid === true,
 
 
 
 
2632
  lastSeen: now(),
2633
  vx: 0, vy: 0,
2634
  dwellAccum: 0,
@@ -2660,9 +2645,13 @@
2660
  baseRange_m: +rangeBase.value,
2661
  baseDwell_s: 5.0,
2662
  reqP_kW: 40,
2663
- depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
2664
  depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
2665
- depth_valid: d.depth_valid === true,
 
 
 
 
2666
  lastSeen: now(),
2667
  vx: 0, vy: 0,
2668
  dwellAccum: 0,
@@ -2756,9 +2745,7 @@
2756
  bbox: normBBox(d.bbox, w, h),
2757
  label: d.class,
2758
  score: d.score,
2759
- depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
2760
- depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
2761
- depth_valid: d.depth_valid === true
2762
  }));
2763
 
2764
  // mark all tracks as unmatched
@@ -2797,10 +2784,9 @@
2797
 
2798
  tr.label = best.label || tr.label;
2799
  tr.score = best.score || tr.score;
2800
- if (best.depth_valid && Number.isFinite(best.depth_est_m)) {
2801
- tr.depth_est_m = best.depth_est_m;
2802
- tr.depth_rel = Number.isFinite(best.depth_rel) ? best.depth_rel : tr.depth_rel;
2803
- tr.depth_valid = true;
2804
  }
2805
  tr.lastSeen = now();
2806
  }
@@ -2825,9 +2811,13 @@
2825
  baseRange_m: +rangeBase.value,
2826
  baseDwell_s: 5.5,
2827
  reqP_kW: 42,
2828
- depth_est_m: detObjs[i].depth_est_m,
2829
  depth_rel: detObjs[i].depth_rel,
2830
- depth_valid: detObjs[i].depth_valid,
 
 
 
 
2831
  lastSeen: now(),
2832
  vx: 0, vy: 0,
2833
  dwellAccum: 0,
@@ -2854,12 +2844,14 @@
2854
  }
2855
 
2856
  function hasValidDepth(item) {
2857
- return item && item.depth_valid === true && Number.isFinite(item.depth_est_m);
 
2858
  }
2859
 
2860
  function getDisplayRange(item, fallbackRange) {
2861
- if (hasValidDepth(item)) {
2862
- return { range: item.depth_est_m, source: "depth" };
 
2863
  }
2864
  return { range: fallbackRange, source: "area" };
2865
  }
@@ -2934,16 +2926,12 @@
2934
  }
2935
 
2936
  function getTrackDisplayRange(track) {
2937
- // Priority: GPT > DepthModel (if enabled) > fallback
2938
  if (track.gpt_distance_m) {
2939
  return { range: track.gpt_distance_m, source: "GPT" };
2940
  }
2941
- // User requested removing "calculated" depth.
2942
- // We still support depth model if valid (optional).
2943
- if (hasValidDepth(track)) {
2944
- return { range: track.depth_est_m, source: "Lidar" };
2945
- }
2946
- return { range: null, source: "Wait..." };
2947
  }
2948
 
2949
  function dwellFromRange(track, range_m) {
@@ -3311,18 +3299,11 @@
3311
  let rangeVal = 3000; // default max scale in meters
3312
  let dist = 1000; // default unknown
3313
 
3314
- if (det.gpt_distance_m) dist = det.gpt_distance_m;
3315
- else if (det.depth_est_m) dist = det.depth_est_m;
3316
- else if (det.box) {
3317
- // Fallback area heuristic
3318
- // In hfAsync, we stored bbox: {x,y,w,h} in PIXELS.
3319
-
3320
- // We need frame dimensions.
3321
- const fw = state.frame.w || 1280;
3322
- const fh = state.frame.h || 720;
3323
- const area = (det.bbox.w * det.bbox.h) / (fw * fh);
3324
- // Heuristic: Area 0.01 => ~2000m, Area 0.5 => ~280m
3325
- dist = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
3326
  }
3327
 
3328
  // Log scale or Linear? Linear is easier for users to map.
 
989
  bbox: { x: x1, y: y1, w: w, h: h },
990
  aim: { ...ap },
991
  features: null,
992
+ baseRange_m: d.gpt_distance_m || null, // GPT is sole source of distance
993
  baseAreaFrac: null,
994
  baseDwell_s: null,
995
  reqP_kW: null,
996
  maxP_kW: null,
997
  pkill: null,
998
+ // GPT properties - sole source of distance estimation
999
  gpt_distance_m: d.gpt_distance_m,
1000
  gpt_direction: d.gpt_direction,
1001
  gpt_description: d.gpt_description,
1002
+ // Depth visualization only (not for distance)
1003
+ depth_rel: d.depth_rel
 
1004
  };
1005
  });
1006
  // Update UI components
 
1874
  const y1 = bbox[1] || 0;
1875
  const x2 = bbox[2] || 0;
1876
  const y2 = bbox[3] || 0;
 
1877
  const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
 
1878
  return {
1879
  bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
1880
  class: d.label || "drone",
1881
  score: d.score ?? 0,
1882
+ depth_rel: depthRel // Visualization only, GPT handles distance
 
 
1883
  };
1884
  });
1885
  }
 
1899
  const y1 = bbox[1] || 0;
1900
  const x2 = bbox[2] || 0;
1901
  const y2 = bbox[3] || 0;
 
1902
  const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
 
1903
  return {
1904
  bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
1905
  class: d.label || "object",
1906
  score: d.score ?? 0,
1907
+ depth_rel: depthRel // Visualization only, GPT handles distance
 
 
1908
  };
1909
  });
1910
  }
 
2115
  reqP_kW: null,
2116
  maxP_kW: null,
2117
  pkill: null,
2118
+ // Depth visualization only, GPT handles distance
2119
+ depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null
 
2120
  };
2121
  });
2122
 
 
2331
 
2332
  if (det.gpt_distance_m) {
2333
  rangeStr = `${det.gpt_distance_m}m (GPT)`;
 
 
 
 
 
 
 
 
 
 
2334
  }
2335
+ // No depth_est_m fallback - GPT is the sole source of distance
2336
 
2337
  if (det.gpt_direction) {
2338
  bearingStr = det.gpt_direction;
 
2607
  baseRange_m: d.baseRange_m || +rangeBase.value,
2608
  baseDwell_s: d.baseDwell_s || 4.0,
2609
  reqP_kW: d.reqP_kW || 35,
2610
+ // Depth visualization (keep for depth view toggle)
2611
  depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
2612
+ // GPT properties - the sole source of distance estimation
2613
+ gpt_distance_m: d.gpt_distance_m || null,
2614
+ gpt_direction: d.gpt_direction || null,
2615
+ gpt_description: d.gpt_description || null,
2616
+ // Track state
2617
  lastSeen: now(),
2618
  vx: 0, vy: 0,
2619
  dwellAccum: 0,
 
2645
  baseRange_m: +rangeBase.value,
2646
  baseDwell_s: 5.0,
2647
  reqP_kW: 40,
2648
+ // Depth visualization only, GPT handles distance
2649
  depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
2650
+ // GPT properties
2651
+ gpt_distance_m: d.gpt_distance_m || null,
2652
+ gpt_direction: d.gpt_direction || null,
2653
+ gpt_description: d.gpt_description || null,
2654
+ // Track state
2655
  lastSeen: now(),
2656
  vx: 0, vy: 0,
2657
  dwellAccum: 0,
 
2745
  bbox: normBBox(d.bbox, w, h),
2746
  label: d.class,
2747
  score: d.score,
2748
+ depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null // Visualization only
 
 
2749
  }));
2750
 
2751
  // mark all tracks as unmatched
 
2784
 
2785
  tr.label = best.label || tr.label;
2786
  tr.score = best.score || tr.score;
2787
+ // Update depth visualization (not for distance)
2788
+ if (Number.isFinite(best.depth_rel)) {
2789
+ tr.depth_rel = best.depth_rel;
 
2790
  }
2791
  tr.lastSeen = now();
2792
  }
 
2811
  baseRange_m: +rangeBase.value,
2812
  baseDwell_s: 5.5,
2813
  reqP_kW: 42,
2814
+ // Depth visualization only, GPT handles distance
2815
  depth_rel: detObjs[i].depth_rel,
2816
+ // GPT properties (will be populated by updateTracksWithGPT)
2817
+ gpt_distance_m: null,
2818
+ gpt_direction: null,
2819
+ gpt_description: null,
2820
+ // Track state
2821
  lastSeen: now(),
2822
  vx: 0, vy: 0,
2823
  dwellAccum: 0,
 
2844
  }
2845
 
2846
  function hasValidDepth(item) {
2847
+ // Only used for depth VIEW toggle, not distance
2848
+ return item && Number.isFinite(item.depth_rel);
2849
  }
2850
 
2851
  function getDisplayRange(item, fallbackRange) {
2852
+ // GPT is the ONLY source of distance
2853
+ if (item && item.gpt_distance_m) {
2854
+ return { range: item.gpt_distance_m, source: "GPT" };
2855
  }
2856
  return { range: fallbackRange, source: "area" };
2857
  }
 
2926
  }
2927
 
2928
  function getTrackDisplayRange(track) {
2929
+ // GPT is the ONLY source of distance estimation
2930
  if (track.gpt_distance_m) {
2931
  return { range: track.gpt_distance_m, source: "GPT" };
2932
  }
2933
+ // No fallback - return null if GPT hasn't provided distance yet
2934
+ return { range: null, source: null };
 
 
 
 
2935
  }
2936
 
2937
  function dwellFromRange(track, range_m) {
 
3299
  let rangeVal = 3000; // default max scale in meters
3300
  let dist = 1000; // default unknown
3301
 
3302
+ if (det.gpt_distance_m) {
3303
+ dist = det.gpt_distance_m;
3304
+ } else {
3305
+ // No GPT yet - show at far distance (unknown)
3306
+ dist = 3000;
 
 
 
 
 
 
 
3307
  }
3308
 
3309
  // Log scale or Linear? Linear is easier for users to map.
app.py CHANGED
@@ -58,6 +58,12 @@ from utils.gpt_distance import estimate_distance_gpt
58
 
59
  logging.basicConfig(level=logging.INFO)
60
 
 
 
 
 
 
 
61
 
62
  async def _periodic_cleanup() -> None:
63
  while True:
 
58
 
59
  logging.basicConfig(level=logging.INFO)
60
 
61
+ # Suppress noisy external libraries
62
+ logging.getLogger("httpx").setLevel(logging.WARNING)
63
+ logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
64
+ logging.getLogger("transformers").setLevel(logging.WARNING)
65
+
66
+
67
 
68
  async def _periodic_cleanup() -> None:
69
  while True:
inference.py CHANGED
@@ -203,9 +203,10 @@ def _attach_depth_metrics(
203
  frame: np.ndarray,
204
  detections: List[Dict[str, Any]],
205
  depth_estimator_name: Optional[str],
206
- depth_scale: float,
207
  estimator_instance: Optional[Any] = None,
208
  ) -> None:
 
209
  if not detections or (not depth_estimator_name and not estimator_instance):
210
  return
211
 
@@ -222,7 +223,7 @@ def _attach_depth_metrics(
222
  else:
223
  estimator = load_depth_estimator(depth_estimator_name)
224
  lock = _get_model_lock("depth", estimator.name)
225
-
226
  with lock:
227
  depth_result = estimator.predict(frame)
228
 
@@ -231,12 +232,10 @@ def _attach_depth_metrics(
231
  return
232
 
233
  height, width = depth_map.shape[:2]
234
- valid_depths: List[float] = []
235
 
236
  for det in detections:
237
- det["depth_est_m"] = None
238
- det["depth_rel"] = None
239
- det["depth_valid"] = False
240
 
241
  bbox = det.get("bbox")
242
  if not bbox or len(bbox) < 4:
@@ -251,13 +250,13 @@ def _attach_depth_metrics(
251
  patch = depth_map[y1:y2, x1:x2]
252
  if patch.size == 0:
253
  continue
254
-
255
  # Center crop (50%) to avoid background
256
  h_p, w_p = patch.shape
257
  cy, cx = h_p // 2, w_p // 2
258
  dy, dx = h_p // 4, w_p // 4
259
  center_patch = patch[cy - dy : cy + dy, cx - dx : cx + dx]
260
-
261
  # Fallback to full patch if center is empty (unlikely)
262
  if center_patch.size == 0:
263
  center_patch = patch
@@ -267,32 +266,20 @@ def _attach_depth_metrics(
267
  continue
268
 
269
  depth_raw = float(np.median(finite))
270
- if depth_raw <= 1e-6:
271
- det["depth_est_m"] = None
272
- det["depth_valid"] = False
273
- continue
274
-
275
- # Inverted depth: closer objects have higher raw values
276
- # Distance = Scale / RawValue
277
- try:
278
- depth_est = depth_scale / depth_raw
279
- except ZeroDivisionError:
280
- continue
281
-
282
- det["depth_est_m"] = depth_est
283
- det["depth_valid"] = True
284
- valid_depths.append(depth_est)
285
 
286
- if not valid_depths:
287
  return
288
 
289
- min_depth = float(min(valid_depths))
290
- max_depth = float(max(valid_depths))
291
- denom = max(max_depth - min_depth, 1e-6)
 
292
 
293
- for det in detections:
294
- if det.get("depth_valid"):
295
- det["depth_rel"] = (float(det["depth_est_m"]) - min_depth) / denom
296
 
297
 
298
  def infer_frame(
@@ -330,13 +317,13 @@ def infer_frame(
330
  except Exception:
331
  logging.exception("Depth estimation failed for frame")
332
 
333
- # Re-build display labels to incude depth if available
334
  display_labels = []
335
  for i, det in enumerate(detections):
336
  label = det["label"]
337
- if det.get("depth_valid") and det.get("depth_est_m") is not None:
338
- # Add depth to label, e.g. "car 12m"
339
- depth_str = f"{int(det['depth_est_m'])}m"
340
  label = f"{label} {depth_str}"
341
  logging.debug("Object '%s' at %s (bbox: %s)", label, depth_str, det['bbox'])
342
  display_labels.append(label)
@@ -431,23 +418,22 @@ def infer_batch(
431
  return outputs
432
 
433
  def _build_display_label(det):
 
434
  label = det["label"]
435
- if det.get("depth_valid") and det.get("depth_est_m") is not None:
436
- depth_str = f"{int(det['depth_est_m'])}m"
437
- label = f"{label} {depth_str}"
438
  return label
439
 
440
  def _attach_depth_from_result(detections, depth_result, depth_scale):
 
441
  depth_map = depth_result.depth_map
442
  if depth_map is None or depth_map.size == 0: return
443
-
444
  height, width = depth_map.shape[:2]
445
- valid_depths = []
446
-
447
  for det in detections:
448
- det["depth_est_m"] = None
449
- det["depth_rel"] = None
450
- det["depth_valid"] = False
451
 
452
  bbox = det.get("bbox")
453
  if not bbox or len(bbox) < 4: continue
@@ -460,7 +446,7 @@ def _attach_depth_from_result(detections, depth_result, depth_scale):
460
 
461
  patch = depth_map[y1:y2, x1:x2]
462
  if patch.size == 0: continue
463
-
464
  h_p, w_p = patch.shape
465
  cy, cx = h_p // 2, w_p // 2
466
  dy, dx = h_p // 4, w_p // 4
@@ -471,29 +457,19 @@ def _attach_depth_from_result(detections, depth_result, depth_scale):
471
  if finite.size == 0: continue
472
 
473
  depth_raw = float(np.median(finite))
474
- if depth_raw <= 1e-6:
475
- det["depth_est_m"] = None
476
- det["depth_valid"] = False
477
- continue
478
 
479
- try:
480
- depth_est = depth_scale / depth_raw
481
- except ZeroDivisionError:
482
- continue
483
-
484
- det["depth_est_m"] = depth_est
485
- det["depth_valid"] = True
486
- valid_depths.append(depth_est)
487
 
488
- if not valid_depths: return
 
 
 
489
 
490
- min_depth = float(min(valid_depths))
491
- max_depth = float(max(valid_depths))
492
- denom = max(max_depth - min_depth, 1e-6)
493
-
494
- for det in detections:
495
- if det.get("depth_valid"):
496
- det["depth_rel"] = (float(det["depth_est_m"]) - min_depth) / denom
497
 
498
 
499
  def infer_segmentation_frame(
@@ -593,11 +569,7 @@ def process_first_frame(
593
  det["gpt_distance_m"] = info.get("distance_m")
594
  det["gpt_direction"] = info.get("direction")
595
  det["gpt_description"] = info.get("description")
596
-
597
- # Also populate standard display fields if legacy depth is off or missing
598
- if not det.get("depth_est_m"):
599
- det["depth_est_m"] = info.get("distance_m") # Polyfill for UI
600
- # We might want to distinguish source later
601
 
602
  except Exception as e:
603
  logging.error(f"GPT Distance estimation failed: {e}")
@@ -1272,8 +1244,8 @@ def run_depth_inference(
1272
  for d in frame_dets:
1273
  boxes.append(d.get("bbox"))
1274
  lbl = d.get("label", "obj")
1275
- if d.get("depth_est_m"):
1276
- lbl = f"{lbl} {int(d['depth_est_m'])}m"
1277
  labels.append(lbl)
1278
  colored = draw_boxes(colored, boxes=boxes, label_names=labels)
1279
 
 
203
  frame: np.ndarray,
204
  detections: List[Dict[str, Any]],
205
  depth_estimator_name: Optional[str],
206
+ depth_scale: float, # No longer used for distance calculation
207
  estimator_instance: Optional[Any] = None,
208
  ) -> None:
209
+ """Attach relative depth values for visualization only. GPT handles distance estimation."""
210
  if not detections or (not depth_estimator_name and not estimator_instance):
211
  return
212
 
 
223
  else:
224
  estimator = load_depth_estimator(depth_estimator_name)
225
  lock = _get_model_lock("depth", estimator.name)
226
+
227
  with lock:
228
  depth_result = estimator.predict(frame)
229
 
 
232
  return
233
 
234
  height, width = depth_map.shape[:2]
235
+ raw_depths: List[Tuple[Dict[str, Any], float]] = []
236
 
237
  for det in detections:
238
+ det["depth_rel"] = None # Relative depth for visualization only
 
 
239
 
240
  bbox = det.get("bbox")
241
  if not bbox or len(bbox) < 4:
 
250
  patch = depth_map[y1:y2, x1:x2]
251
  if patch.size == 0:
252
  continue
253
+
254
  # Center crop (50%) to avoid background
255
  h_p, w_p = patch.shape
256
  cy, cx = h_p // 2, w_p // 2
257
  dy, dx = h_p // 4, w_p // 4
258
  center_patch = patch[cy - dy : cy + dy, cx - dx : cx + dx]
259
+
260
  # Fallback to full patch if center is empty (unlikely)
261
  if center_patch.size == 0:
262
  center_patch = patch
 
266
  continue
267
 
268
  depth_raw = float(np.median(finite))
269
+ if depth_raw > 1e-6:
270
+ raw_depths.append((det, depth_raw))
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
+ if not raw_depths:
273
  return
274
 
275
+ # Compute relative depth (0-1) for visualization only
276
+ all_raw = [d[1] for d in raw_depths]
277
+ min_raw, max_raw = min(all_raw), max(all_raw)
278
+ denom = max(max_raw - min_raw, 1e-6)
279
 
280
+ for det, depth_raw in raw_depths:
281
+ # Inverted: higher raw = closer = lower rel value (0=close, 1=far)
282
+ det["depth_rel"] = 1.0 - ((depth_raw - min_raw) / denom)
283
 
284
 
285
  def infer_frame(
 
317
  except Exception:
318
  logging.exception("Depth estimation failed for frame")
319
 
320
+ # Re-build display labels to include GPT distance if available
321
  display_labels = []
322
  for i, det in enumerate(detections):
323
  label = det["label"]
324
+ if det.get("gpt_distance_m") is not None:
325
+ # Add GPT distance to label, e.g. "car 12m"
326
+ depth_str = f"{int(det['gpt_distance_m'])}m"
327
  label = f"{label} {depth_str}"
328
  logging.debug("Object '%s' at %s (bbox: %s)", label, depth_str, det['bbox'])
329
  display_labels.append(label)
 
418
  return outputs
419
 
420
  def _build_display_label(det):
421
+ """Build display label with GPT distance if available."""
422
  label = det["label"]
423
+ if det.get("gpt_distance_m") is not None:
424
+ label = f"{label} {int(det['gpt_distance_m'])}m"
 
425
  return label
426
 
427
  def _attach_depth_from_result(detections, depth_result, depth_scale):
428
+ """Attach relative depth values for visualization only. GPT handles distance estimation."""
429
  depth_map = depth_result.depth_map
430
  if depth_map is None or depth_map.size == 0: return
431
+
432
  height, width = depth_map.shape[:2]
433
+ raw_depths = []
434
+
435
  for det in detections:
436
+ det["depth_rel"] = None # Relative depth for visualization only
 
 
437
 
438
  bbox = det.get("bbox")
439
  if not bbox or len(bbox) < 4: continue
 
446
 
447
  patch = depth_map[y1:y2, x1:x2]
448
  if patch.size == 0: continue
449
+
450
  h_p, w_p = patch.shape
451
  cy, cx = h_p // 2, w_p // 2
452
  dy, dx = h_p // 4, w_p // 4
 
457
  if finite.size == 0: continue
458
 
459
  depth_raw = float(np.median(finite))
460
+ if depth_raw > 1e-6:
461
+ raw_depths.append((det, depth_raw))
 
 
462
 
463
+ if not raw_depths: return
 
 
 
 
 
 
 
464
 
465
+ # Compute relative depth (0-1) for visualization only
466
+ all_raw = [d[1] for d in raw_depths]
467
+ min_raw, max_raw = min(all_raw), max(all_raw)
468
+ denom = max(max_raw - min_raw, 1e-6)
469
 
470
+ for det, depth_raw in raw_depths:
471
+ # Inverted: higher raw = closer = lower rel value (0=close, 1=far)
472
+ det["depth_rel"] = 1.0 - ((depth_raw - min_raw) / denom)
 
 
 
 
473
 
474
 
475
  def infer_segmentation_frame(
 
569
  det["gpt_distance_m"] = info.get("distance_m")
570
  det["gpt_direction"] = info.get("direction")
571
  det["gpt_description"] = info.get("description")
572
+ # GPT is the sole source of distance - no polyfill needed
 
 
 
 
573
 
574
  except Exception as e:
575
  logging.error(f"GPT Distance estimation failed: {e}")
 
1244
  for d in frame_dets:
1245
  boxes.append(d.get("bbox"))
1246
  lbl = d.get("label", "obj")
1247
+ if d.get("gpt_distance_m"):
1248
+ lbl = f"{lbl} {int(d['gpt_distance_m'])}m"
1249
  labels.append(lbl)
1250
  colored = draw_boxes(colored, boxes=boxes, label_names=labels)
1251