Spaces:

BiasLab2025
/

perception

Sleeping

Zhen Ye commited on 12 days ago

Commit

64bbe44

1 Parent(s): 5c36daa

Refactor: Decouple depth estimator and suppress noisy logs

- Backend: Remove heuristic depth calculation; depth model now only outputs relative depth for visualization.
- Frontend: Establish GPT as the sole authority for track distances.
- Config: Reduce httpx/huggingface logging noise to WARNING level.

Files changed (3) hide show

LaserPerception/LaserPerception.js +44 -63
app.py +6 -0
inference.py +43 -71

LaserPerception/LaserPerception.js CHANGED Viewed

@@ -989,19 +989,18 @@
                         bbox: { x: x1, y: y1, w: w, h: h },
                         aim: { ...ap },
                         features: null,
-                        baseRange_m: d.gpt_distance_m || d.depth_est_m || null, // Priority to GPT/Depth
                         baseAreaFrac: null,
                         baseDwell_s: null,
                         reqP_kW: null,
                         maxP_kW: null,
                         pkill: null,
-                        // Pass through backend props
                         gpt_distance_m: d.gpt_distance_m,
                         gpt_direction: d.gpt_direction,
                         gpt_description: d.gpt_description,
-                        depth_est_m: d.depth_est_m,
-                        depth_rel: d.depth_rel,
-                        depth_valid: !!(d.depth_est_m || d.gpt_distance_m)
                     };
                 });
                 // Update UI components
@@ -1875,16 +1874,12 @@
                     const y1 = bbox[1] || 0;
                     const x2 = bbox[2] || 0;
                     const y2 = bbox[3] || 0;
-                    const depthEst = Number.isFinite(d.depth_est_m) ? d.depth_est_m : null;
                     const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
-                    const depthValid = d.depth_valid === true && depthEst !== null;
                     return {
                         bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
                         class: d.label || "drone",
                         score: d.score ?? 0,
-                        depth_est_m: depthEst,
-                        depth_rel: depthRel,
-                        depth_valid: depthValid
                     };
                 });
             }
@@ -1904,16 +1899,12 @@
                         const y1 = bbox[1] || 0;
                         const x2 = bbox[2] || 0;
                         const y2 = bbox[3] || 0;
-                        const depthEst = Number.isFinite(d.depth_est_m) ? d.depth_est_m : null;
                         const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
-                        const depthValid = d.depth_valid === true && depthEst !== null;
                         return {
                             bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
                             class: d.label || "object",
                             score: d.score ?? 0,
-                            depth_est_m: depthEst,
-                            depth_rel: depthRel,
-                            depth_valid: depthValid
                         };
                     });
                 }
@@ -2124,9 +2115,8 @@
                     reqP_kW: null,
                     maxP_kW: null,
                     pkill: null,
-                    depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
-                    depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
-                    depth_valid: d.depth_valid === true
                 };
             });
@@ -2341,17 +2331,8 @@
             if (det.gpt_distance_m) {
                 rangeStr = `${det.gpt_distance_m}m (GPT)`;
-            } else if (det.depth_est_m) {
-                rangeStr = `${Math.round(det.depth_est_m)}m (Lidar)`;
-            } else {
-                // Fallback
-                if (det.box) {
-                    const [x1, y1, x2, y2] = det.box;
-                    const area = ((x2 - x1) * (y2 - y1)) / (state.frame.w * state.frame.h);
-                    const est = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
-                    rangeStr = `~${Math.round(est)}m (Est)`;
-                }
             }
             if (det.gpt_direction) {
                 bearingStr = det.gpt_direction;
@@ -2626,9 +2607,13 @@
                 baseRange_m: d.baseRange_m || +rangeBase.value,
                 baseDwell_s: d.baseDwell_s || 4.0,
                 reqP_kW: d.reqP_kW || 35,
-                depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
                 depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
-                depth_valid: d.depth_valid === true,
                 lastSeen: now(),
                 vx: 0, vy: 0,
                 dwellAccum: 0,
@@ -2660,9 +2645,13 @@
                 baseRange_m: +rangeBase.value,
                 baseDwell_s: 5.0,
                 reqP_kW: 40,
-                depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
                 depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
-                depth_valid: d.depth_valid === true,
                 lastSeen: now(),
                 vx: 0, vy: 0,
                 dwellAccum: 0,
@@ -2756,9 +2745,7 @@
             bbox: normBBox(d.bbox, w, h),
             label: d.class,
             score: d.score,
-            depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
-            depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
-            depth_valid: d.depth_valid === true
         }));
         // mark all tracks as unmatched
@@ -2797,10 +2784,9 @@
                 tr.label = best.label || tr.label;
                 tr.score = best.score || tr.score;
-                if (best.depth_valid && Number.isFinite(best.depth_est_m)) {
-                    tr.depth_est_m = best.depth_est_m;
-                    tr.depth_rel = Number.isFinite(best.depth_rel) ? best.depth_rel : tr.depth_rel;
-                    tr.depth_valid = true;
                 }
                 tr.lastSeen = now();
             }
@@ -2825,9 +2811,13 @@
                 baseRange_m: +rangeBase.value,
                 baseDwell_s: 5.5,
                 reqP_kW: 42,
-                depth_est_m: detObjs[i].depth_est_m,
                 depth_rel: detObjs[i].depth_rel,
-                depth_valid: detObjs[i].depth_valid,
                 lastSeen: now(),
                 vx: 0, vy: 0,
                 dwellAccum: 0,
@@ -2854,12 +2844,14 @@
     }
     function hasValidDepth(item) {
-        return item && item.depth_valid === true && Number.isFinite(item.depth_est_m);
     }
     function getDisplayRange(item, fallbackRange) {
-        if (hasValidDepth(item)) {
-            return { range: item.depth_est_m, source: "depth" };
         }
         return { range: fallbackRange, source: "area" };
     }
@@ -2934,16 +2926,12 @@
     }
     function getTrackDisplayRange(track) {
-        // Priority: GPT > DepthModel (if enabled) > fallback
         if (track.gpt_distance_m) {
             return { range: track.gpt_distance_m, source: "GPT" };
         }
-        // User requested removing "calculated" depth.
-        // We still support depth model if valid (optional).
-        if (hasValidDepth(track)) {
-            return { range: track.depth_est_m, source: "Lidar" };
-        }
-        return { range: null, source: "Wait..." };
     }
     function dwellFromRange(track, range_m) {
@@ -3311,18 +3299,11 @@
                 let rangeVal = 3000; // default max scale in meters
                 let dist = 1000; // default unknown
-                if (det.gpt_distance_m) dist = det.gpt_distance_m;
-                else if (det.depth_est_m) dist = det.depth_est_m;
-                else if (det.box) {
-                    // Fallback area heuristic
-                    // In hfAsync, we stored bbox: {x,y,w,h} in PIXELS.
-                    // We need frame dimensions.
-                    const fw = state.frame.w || 1280;
-                    const fh = state.frame.h || 720;
-                    const area = (det.bbox.w * det.bbox.h) / (fw * fh);
-                    // Heuristic: Area 0.01 => ~2000m, Area 0.5 => ~280m
-                    dist = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
                 }
                 // Log scale or Linear? Linear is easier for users to map.

                         bbox: { x: x1, y: y1, w: w, h: h },
                         aim: { ...ap },
                         features: null,
+                        baseRange_m: d.gpt_distance_m || null, // GPT is sole source of distance
                         baseAreaFrac: null,
                         baseDwell_s: null,
                         reqP_kW: null,
                         maxP_kW: null,
                         pkill: null,
+                        // GPT properties - sole source of distance estimation
                         gpt_distance_m: d.gpt_distance_m,
                         gpt_direction: d.gpt_direction,
                         gpt_description: d.gpt_description,
+                        // Depth visualization only (not for distance)
+                        depth_rel: d.depth_rel
                     };
                 });
                 // Update UI components
                     const y1 = bbox[1] || 0;
                     const x2 = bbox[2] || 0;
                     const y2 = bbox[3] || 0;
                     const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
                     return {
                         bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
                         class: d.label || "drone",
                         score: d.score ?? 0,
+                        depth_rel: depthRel  // Visualization only, GPT handles distance
                     };
                 });
             }
                         const y1 = bbox[1] || 0;
                         const x2 = bbox[2] || 0;
                         const y2 = bbox[3] || 0;
                         const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
                         return {
                             bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
                             class: d.label || "object",
                             score: d.score ?? 0,
+                            depth_rel: depthRel  // Visualization only, GPT handles distance
                         };
                     });
                 }
                     reqP_kW: null,
                     maxP_kW: null,
                     pkill: null,
+                    // Depth visualization only, GPT handles distance
+                    depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null
                 };
             });
             if (det.gpt_distance_m) {
                 rangeStr = `${det.gpt_distance_m}m (GPT)`;
             }
+            // No depth_est_m fallback - GPT is the sole source of distance
             if (det.gpt_direction) {
                 bearingStr = det.gpt_direction;
                 baseRange_m: d.baseRange_m || +rangeBase.value,
                 baseDwell_s: d.baseDwell_s || 4.0,
                 reqP_kW: d.reqP_kW || 35,
+                // Depth visualization (keep for depth view toggle)
                 depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
+                // GPT properties - the sole source of distance estimation
+                gpt_distance_m: d.gpt_distance_m || null,
+                gpt_direction: d.gpt_direction || null,
+                gpt_description: d.gpt_description || null,
+                // Track state
                 lastSeen: now(),
                 vx: 0, vy: 0,
                 dwellAccum: 0,
                 baseRange_m: +rangeBase.value,
                 baseDwell_s: 5.0,
                 reqP_kW: 40,
+                // Depth visualization only, GPT handles distance
                 depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
+                // GPT properties
+                gpt_distance_m: d.gpt_distance_m || null,
+                gpt_direction: d.gpt_direction || null,
+                gpt_description: d.gpt_description || null,
+                // Track state
                 lastSeen: now(),
                 vx: 0, vy: 0,
                 dwellAccum: 0,
             bbox: normBBox(d.bbox, w, h),
             label: d.class,
             score: d.score,
+            depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null  // Visualization only
         }));
         // mark all tracks as unmatched
                 tr.label = best.label || tr.label;
                 tr.score = best.score || tr.score;
+                // Update depth visualization (not for distance)
+                if (Number.isFinite(best.depth_rel)) {
+                    tr.depth_rel = best.depth_rel;
                 }
                 tr.lastSeen = now();
             }
                 baseRange_m: +rangeBase.value,
                 baseDwell_s: 5.5,
                 reqP_kW: 42,
+                // Depth visualization only, GPT handles distance
                 depth_rel: detObjs[i].depth_rel,
+                // GPT properties (will be populated by updateTracksWithGPT)
+                gpt_distance_m: null,
+                gpt_direction: null,
+                gpt_description: null,
+                // Track state
                 lastSeen: now(),
                 vx: 0, vy: 0,
                 dwellAccum: 0,
     }
     function hasValidDepth(item) {
+        // Only used for depth VIEW toggle, not distance
+        return item && Number.isFinite(item.depth_rel);
     }
     function getDisplayRange(item, fallbackRange) {
+        // GPT is the ONLY source of distance
+        if (item && item.gpt_distance_m) {
+            return { range: item.gpt_distance_m, source: "GPT" };
         }
         return { range: fallbackRange, source: "area" };
     }
     }
     function getTrackDisplayRange(track) {
+        // GPT is the ONLY source of distance estimation
         if (track.gpt_distance_m) {
             return { range: track.gpt_distance_m, source: "GPT" };
         }
+        // No fallback - return null if GPT hasn't provided distance yet
+        return { range: null, source: null };
     }
     function dwellFromRange(track, range_m) {
                 let rangeVal = 3000; // default max scale in meters
                 let dist = 1000; // default unknown
+                if (det.gpt_distance_m) {
+                    dist = det.gpt_distance_m;
+                } else {
+                    // No GPT yet - show at far distance (unknown)
+                    dist = 3000;
                 }
                 // Log scale or Linear? Linear is easier for users to map.

app.py CHANGED Viewed

@@ -58,6 +58,12 @@ from utils.gpt_distance import estimate_distance_gpt
 logging.basicConfig(level=logging.INFO)
 async def _periodic_cleanup() -> None:
     while True:

 logging.basicConfig(level=logging.INFO)
+# Suppress noisy external libraries
+logging.getLogger("httpx").setLevel(logging.WARNING)
+logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
+logging.getLogger("transformers").setLevel(logging.WARNING)
 async def _periodic_cleanup() -> None:
     while True:

inference.py CHANGED Viewed

@@ -203,9 +203,10 @@ def _attach_depth_metrics(
     frame: np.ndarray,
     detections: List[Dict[str, Any]],
     depth_estimator_name: Optional[str],
-    depth_scale: float,
     estimator_instance: Optional[Any] = None,
 ) -> None:
     if not detections or (not depth_estimator_name and not estimator_instance):
         return
@@ -222,7 +223,7 @@ def _attach_depth_metrics(
     else:
         estimator = load_depth_estimator(depth_estimator_name)
         lock = _get_model_lock("depth", estimator.name)
     with lock:
         depth_result = estimator.predict(frame)
@@ -231,12 +232,10 @@ def _attach_depth_metrics(
         return
     height, width = depth_map.shape[:2]
-    valid_depths: List[float] = []
     for det in detections:
-        det["depth_est_m"] = None
-        det["depth_rel"] = None
-        det["depth_valid"] = False
         bbox = det.get("bbox")
         if not bbox or len(bbox) < 4:
@@ -251,13 +250,13 @@ def _attach_depth_metrics(
         patch = depth_map[y1:y2, x1:x2]
         if patch.size == 0:
             continue
         # Center crop (50%) to avoid background
         h_p, w_p = patch.shape
         cy, cx = h_p // 2, w_p // 2
         dy, dx = h_p // 4, w_p // 4
         center_patch = patch[cy - dy : cy + dy, cx - dx : cx + dx]
         # Fallback to full patch if center is empty (unlikely)
         if center_patch.size == 0:
            center_patch = patch
@@ -267,32 +266,20 @@ def _attach_depth_metrics(
             continue
         depth_raw = float(np.median(finite))
-        if depth_raw <= 1e-6:
-            det["depth_est_m"] = None
-            det["depth_valid"] = False
-            continue
-        # Inverted depth: closer objects have higher raw values
-        # Distance = Scale / RawValue
-        try:
-            depth_est = depth_scale / depth_raw
-        except ZeroDivisionError:
-            continue
-        det["depth_est_m"] = depth_est
-        det["depth_valid"] = True
-        valid_depths.append(depth_est)
-    if not valid_depths:
         return
-    min_depth = float(min(valid_depths))
-    max_depth = float(max(valid_depths))
-    denom = max(max_depth - min_depth, 1e-6)
-    for det in detections:
-        if det.get("depth_valid"):
-            det["depth_rel"] = (float(det["depth_est_m"]) - min_depth) / denom
 def infer_frame(
@@ -330,13 +317,13 @@ def infer_frame(
             except Exception:
                 logging.exception("Depth estimation failed for frame")
-        # Re-build display labels to incude depth if available
         display_labels = []
         for i, det in enumerate(detections):
             label = det["label"]
-            if det.get("depth_valid") and det.get("depth_est_m") is not None:
-                # Add depth to label, e.g. "car 12m"
-                depth_str = f"{int(det['depth_est_m'])}m"
                 label = f"{label} {depth_str}"
                 logging.debug("Object '%s' at %s (bbox: %s)", label, depth_str, det['bbox'])
             display_labels.append(label)
@@ -431,23 +418,22 @@ def infer_batch(
     return outputs
 def _build_display_label(det):
     label = det["label"]
-    if det.get("depth_valid") and det.get("depth_est_m") is not None:
-         depth_str = f"{int(det['depth_est_m'])}m"
-         label = f"{label} {depth_str}"
     return label
 def _attach_depth_from_result(detections, depth_result, depth_scale):
     depth_map = depth_result.depth_map
     if depth_map is None or depth_map.size == 0: return
     height, width = depth_map.shape[:2]
-    valid_depths = []
     for det in detections:
-        det["depth_est_m"] = None
-        det["depth_rel"] = None
-        det["depth_valid"] = False
         bbox = det.get("bbox")
         if not bbox or len(bbox) < 4: continue
@@ -460,7 +446,7 @@ def _attach_depth_from_result(detections, depth_result, depth_scale):
         patch = depth_map[y1:y2, x1:x2]
         if patch.size == 0: continue
         h_p, w_p = patch.shape
         cy, cx = h_p // 2, w_p // 2
         dy, dx = h_p // 4, w_p // 4
@@ -471,29 +457,19 @@ def _attach_depth_from_result(detections, depth_result, depth_scale):
         if finite.size == 0: continue
         depth_raw = float(np.median(finite))
-        if depth_raw <= 1e-6:
-             det["depth_est_m"] = None
-             det["depth_valid"] = False
-             continue
-        try:
-            depth_est = depth_scale / depth_raw
-        except ZeroDivisionError:
-            continue
-        det["depth_est_m"] = depth_est
-        det["depth_valid"] = True
-        valid_depths.append(depth_est)
-    if not valid_depths: return
-    min_depth = float(min(valid_depths))
-    max_depth = float(max(valid_depths))
-    denom = max(max_depth - min_depth, 1e-6)
-    for det in detections:
-        if det.get("depth_valid"):
-            det["depth_rel"] = (float(det["depth_est_m"]) - min_depth) / denom
 def infer_segmentation_frame(
@@ -593,11 +569,7 @@ def process_first_frame(
                      det["gpt_distance_m"] = info.get("distance_m")
                      det["gpt_direction"] = info.get("direction")
                      det["gpt_description"] = info.get("description")
-                     # Also populate standard display fields if legacy depth is off or missing
-                     if not det.get("depth_est_m"):
-                         det["depth_est_m"] = info.get("distance_m") # Polyfill for UI
-                         # We might want to distinguish source later
         except Exception as e:
             logging.error(f"GPT Distance estimation failed: {e}")
@@ -1272,8 +1244,8 @@ def run_depth_inference(
                              for d in frame_dets:
                                  boxes.append(d.get("bbox"))
                                  lbl = d.get("label", "obj")
-                                 if d.get("depth_est_m"):
-                                     lbl = f"{lbl} {int(d['depth_est_m'])}m"
                                  labels.append(lbl)
                              colored = draw_boxes(colored, boxes=boxes, label_names=labels)

     frame: np.ndarray,
     detections: List[Dict[str, Any]],
     depth_estimator_name: Optional[str],
+    depth_scale: float,  # No longer used for distance calculation
     estimator_instance: Optional[Any] = None,
 ) -> None:
+    """Attach relative depth values for visualization only. GPT handles distance estimation."""
     if not detections or (not depth_estimator_name and not estimator_instance):
         return
     else:
         estimator = load_depth_estimator(depth_estimator_name)
         lock = _get_model_lock("depth", estimator.name)
     with lock:
         depth_result = estimator.predict(frame)
         return
     height, width = depth_map.shape[:2]
+    raw_depths: List[Tuple[Dict[str, Any], float]] = []
     for det in detections:
+        det["depth_rel"] = None  # Relative depth for visualization only
         bbox = det.get("bbox")
         if not bbox or len(bbox) < 4:
         patch = depth_map[y1:y2, x1:x2]
         if patch.size == 0:
             continue
         # Center crop (50%) to avoid background
         h_p, w_p = patch.shape
         cy, cx = h_p // 2, w_p // 2
         dy, dx = h_p // 4, w_p // 4
         center_patch = patch[cy - dy : cy + dy, cx - dx : cx + dx]
         # Fallback to full patch if center is empty (unlikely)
         if center_patch.size == 0:
            center_patch = patch
             continue
         depth_raw = float(np.median(finite))
+        if depth_raw > 1e-6:
+            raw_depths.append((det, depth_raw))
+    if not raw_depths:
         return
+    # Compute relative depth (0-1) for visualization only
+    all_raw = [d[1] for d in raw_depths]
+    min_raw, max_raw = min(all_raw), max(all_raw)
+    denom = max(max_raw - min_raw, 1e-6)
+    for det, depth_raw in raw_depths:
+        # Inverted: higher raw = closer = lower rel value (0=close, 1=far)
+        det["depth_rel"] = 1.0 - ((depth_raw - min_raw) / denom)
 def infer_frame(
             except Exception:
                 logging.exception("Depth estimation failed for frame")
+        # Re-build display labels to include GPT distance if available
         display_labels = []
         for i, det in enumerate(detections):
             label = det["label"]
+            if det.get("gpt_distance_m") is not None:
+                # Add GPT distance to label, e.g. "car 12m"
+                depth_str = f"{int(det['gpt_distance_m'])}m"
                 label = f"{label} {depth_str}"
                 logging.debug("Object '%s' at %s (bbox: %s)", label, depth_str, det['bbox'])
             display_labels.append(label)
     return outputs
 def _build_display_label(det):
+    """Build display label with GPT distance if available."""
     label = det["label"]
+    if det.get("gpt_distance_m") is not None:
+        label = f"{label} {int(det['gpt_distance_m'])}m"
     return label
 def _attach_depth_from_result(detections, depth_result, depth_scale):
+    """Attach relative depth values for visualization only. GPT handles distance estimation."""
     depth_map = depth_result.depth_map
     if depth_map is None or depth_map.size == 0: return
     height, width = depth_map.shape[:2]
+    raw_depths = []
     for det in detections:
+        det["depth_rel"] = None  # Relative depth for visualization only
         bbox = det.get("bbox")
         if not bbox or len(bbox) < 4: continue
         patch = depth_map[y1:y2, x1:x2]
         if patch.size == 0: continue
         h_p, w_p = patch.shape
         cy, cx = h_p // 2, w_p // 2
         dy, dx = h_p // 4, w_p // 4
         if finite.size == 0: continue
         depth_raw = float(np.median(finite))
+        if depth_raw > 1e-6:
+            raw_depths.append((det, depth_raw))
+    if not raw_depths: return
+    # Compute relative depth (0-1) for visualization only
+    all_raw = [d[1] for d in raw_depths]
+    min_raw, max_raw = min(all_raw), max(all_raw)
+    denom = max(max_raw - min_raw, 1e-6)
+    for det, depth_raw in raw_depths:
+        # Inverted: higher raw = closer = lower rel value (0=close, 1=far)
+        det["depth_rel"] = 1.0 - ((depth_raw - min_raw) / denom)
 def infer_segmentation_frame(
                      det["gpt_distance_m"] = info.get("distance_m")
                      det["gpt_direction"] = info.get("direction")
                      det["gpt_description"] = info.get("description")
+                     # GPT is the sole source of distance - no polyfill needed
         except Exception as e:
             logging.error(f"GPT Distance estimation failed: {e}")
                              for d in frame_dets:
                                  boxes.append(d.get("bbox"))
                                  lbl = d.get("label", "obj")
+                                 if d.get("gpt_distance_m"):
+                                     lbl = f"{lbl} {int(d['gpt_distance_m'])}m"
                                  labels.append(lbl)
                              colored = draw_boxes(colored, boxes=boxes, label_names=labels)