Spaces:
Sleeping
Sleeping
Zhen Ye
commited on
Commit
·
64bbe44
1
Parent(s):
5c36daa
Refactor: Decouple depth estimator and suppress noisy logs
Browse files- Backend: Remove heuristic depth calculation; depth model now only outputs relative depth for visualization.
- Frontend: Establish GPT as the sole authority for track distances.
- Config: Reduce httpx/huggingface logging noise to WARNING level.
- LaserPerception/LaserPerception.js +44 -63
- app.py +6 -0
- inference.py +43 -71
LaserPerception/LaserPerception.js
CHANGED
|
@@ -989,19 +989,18 @@
|
|
| 989 |
bbox: { x: x1, y: y1, w: w, h: h },
|
| 990 |
aim: { ...ap },
|
| 991 |
features: null,
|
| 992 |
-
baseRange_m: d.gpt_distance_m ||
|
| 993 |
baseAreaFrac: null,
|
| 994 |
baseDwell_s: null,
|
| 995 |
reqP_kW: null,
|
| 996 |
maxP_kW: null,
|
| 997 |
pkill: null,
|
| 998 |
-
//
|
| 999 |
gpt_distance_m: d.gpt_distance_m,
|
| 1000 |
gpt_direction: d.gpt_direction,
|
| 1001 |
gpt_description: d.gpt_description,
|
| 1002 |
-
|
| 1003 |
-
depth_rel: d.depth_rel
|
| 1004 |
-
depth_valid: !!(d.depth_est_m || d.gpt_distance_m)
|
| 1005 |
};
|
| 1006 |
});
|
| 1007 |
// Update UI components
|
|
@@ -1875,16 +1874,12 @@
|
|
| 1875 |
const y1 = bbox[1] || 0;
|
| 1876 |
const x2 = bbox[2] || 0;
|
| 1877 |
const y2 = bbox[3] || 0;
|
| 1878 |
-
const depthEst = Number.isFinite(d.depth_est_m) ? d.depth_est_m : null;
|
| 1879 |
const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
|
| 1880 |
-
const depthValid = d.depth_valid === true && depthEst !== null;
|
| 1881 |
return {
|
| 1882 |
bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
|
| 1883 |
class: d.label || "drone",
|
| 1884 |
score: d.score ?? 0,
|
| 1885 |
-
|
| 1886 |
-
depth_rel: depthRel,
|
| 1887 |
-
depth_valid: depthValid
|
| 1888 |
};
|
| 1889 |
});
|
| 1890 |
}
|
|
@@ -1904,16 +1899,12 @@
|
|
| 1904 |
const y1 = bbox[1] || 0;
|
| 1905 |
const x2 = bbox[2] || 0;
|
| 1906 |
const y2 = bbox[3] || 0;
|
| 1907 |
-
const depthEst = Number.isFinite(d.depth_est_m) ? d.depth_est_m : null;
|
| 1908 |
const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
|
| 1909 |
-
const depthValid = d.depth_valid === true && depthEst !== null;
|
| 1910 |
return {
|
| 1911 |
bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
|
| 1912 |
class: d.label || "object",
|
| 1913 |
score: d.score ?? 0,
|
| 1914 |
-
|
| 1915 |
-
depth_rel: depthRel,
|
| 1916 |
-
depth_valid: depthValid
|
| 1917 |
};
|
| 1918 |
});
|
| 1919 |
}
|
|
@@ -2124,9 +2115,8 @@
|
|
| 2124 |
reqP_kW: null,
|
| 2125 |
maxP_kW: null,
|
| 2126 |
pkill: null,
|
| 2127 |
-
|
| 2128 |
-
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null
|
| 2129 |
-
depth_valid: d.depth_valid === true
|
| 2130 |
};
|
| 2131 |
});
|
| 2132 |
|
|
@@ -2341,17 +2331,8 @@
|
|
| 2341 |
|
| 2342 |
if (det.gpt_distance_m) {
|
| 2343 |
rangeStr = `${det.gpt_distance_m}m (GPT)`;
|
| 2344 |
-
} else if (det.depth_est_m) {
|
| 2345 |
-
rangeStr = `${Math.round(det.depth_est_m)}m (Lidar)`;
|
| 2346 |
-
} else {
|
| 2347 |
-
// Fallback
|
| 2348 |
-
if (det.box) {
|
| 2349 |
-
const [x1, y1, x2, y2] = det.box;
|
| 2350 |
-
const area = ((x2 - x1) * (y2 - y1)) / (state.frame.w * state.frame.h);
|
| 2351 |
-
const est = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
|
| 2352 |
-
rangeStr = `~${Math.round(est)}m (Est)`;
|
| 2353 |
-
}
|
| 2354 |
}
|
|
|
|
| 2355 |
|
| 2356 |
if (det.gpt_direction) {
|
| 2357 |
bearingStr = det.gpt_direction;
|
|
@@ -2626,9 +2607,13 @@
|
|
| 2626 |
baseRange_m: d.baseRange_m || +rangeBase.value,
|
| 2627 |
baseDwell_s: d.baseDwell_s || 4.0,
|
| 2628 |
reqP_kW: d.reqP_kW || 35,
|
| 2629 |
-
|
| 2630 |
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
|
| 2631 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2632 |
lastSeen: now(),
|
| 2633 |
vx: 0, vy: 0,
|
| 2634 |
dwellAccum: 0,
|
|
@@ -2660,9 +2645,13 @@
|
|
| 2660 |
baseRange_m: +rangeBase.value,
|
| 2661 |
baseDwell_s: 5.0,
|
| 2662 |
reqP_kW: 40,
|
| 2663 |
-
|
| 2664 |
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
|
| 2665 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2666 |
lastSeen: now(),
|
| 2667 |
vx: 0, vy: 0,
|
| 2668 |
dwellAccum: 0,
|
|
@@ -2756,9 +2745,7 @@
|
|
| 2756 |
bbox: normBBox(d.bbox, w, h),
|
| 2757 |
label: d.class,
|
| 2758 |
score: d.score,
|
| 2759 |
-
|
| 2760 |
-
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
|
| 2761 |
-
depth_valid: d.depth_valid === true
|
| 2762 |
}));
|
| 2763 |
|
| 2764 |
// mark all tracks as unmatched
|
|
@@ -2797,10 +2784,9 @@
|
|
| 2797 |
|
| 2798 |
tr.label = best.label || tr.label;
|
| 2799 |
tr.score = best.score || tr.score;
|
| 2800 |
-
|
| 2801 |
-
|
| 2802 |
-
tr.depth_rel =
|
| 2803 |
-
tr.depth_valid = true;
|
| 2804 |
}
|
| 2805 |
tr.lastSeen = now();
|
| 2806 |
}
|
|
@@ -2825,9 +2811,13 @@
|
|
| 2825 |
baseRange_m: +rangeBase.value,
|
| 2826 |
baseDwell_s: 5.5,
|
| 2827 |
reqP_kW: 42,
|
| 2828 |
-
|
| 2829 |
depth_rel: detObjs[i].depth_rel,
|
| 2830 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2831 |
lastSeen: now(),
|
| 2832 |
vx: 0, vy: 0,
|
| 2833 |
dwellAccum: 0,
|
|
@@ -2854,12 +2844,14 @@
|
|
| 2854 |
}
|
| 2855 |
|
| 2856 |
function hasValidDepth(item) {
|
| 2857 |
-
|
|
|
|
| 2858 |
}
|
| 2859 |
|
| 2860 |
function getDisplayRange(item, fallbackRange) {
|
| 2861 |
-
|
| 2862 |
-
|
|
|
|
| 2863 |
}
|
| 2864 |
return { range: fallbackRange, source: "area" };
|
| 2865 |
}
|
|
@@ -2934,16 +2926,12 @@
|
|
| 2934 |
}
|
| 2935 |
|
| 2936 |
function getTrackDisplayRange(track) {
|
| 2937 |
-
//
|
| 2938 |
if (track.gpt_distance_m) {
|
| 2939 |
return { range: track.gpt_distance_m, source: "GPT" };
|
| 2940 |
}
|
| 2941 |
-
//
|
| 2942 |
-
|
| 2943 |
-
if (hasValidDepth(track)) {
|
| 2944 |
-
return { range: track.depth_est_m, source: "Lidar" };
|
| 2945 |
-
}
|
| 2946 |
-
return { range: null, source: "Wait..." };
|
| 2947 |
}
|
| 2948 |
|
| 2949 |
function dwellFromRange(track, range_m) {
|
|
@@ -3311,18 +3299,11 @@
|
|
| 3311 |
let rangeVal = 3000; // default max scale in meters
|
| 3312 |
let dist = 1000; // default unknown
|
| 3313 |
|
| 3314 |
-
if (det.gpt_distance_m)
|
| 3315 |
-
|
| 3316 |
-
else
|
| 3317 |
-
//
|
| 3318 |
-
|
| 3319 |
-
|
| 3320 |
-
// We need frame dimensions.
|
| 3321 |
-
const fw = state.frame.w || 1280;
|
| 3322 |
-
const fh = state.frame.h || 720;
|
| 3323 |
-
const area = (det.bbox.w * det.bbox.h) / (fw * fh);
|
| 3324 |
-
// Heuristic: Area 0.01 => ~2000m, Area 0.5 => ~280m
|
| 3325 |
-
dist = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
|
| 3326 |
}
|
| 3327 |
|
| 3328 |
// Log scale or Linear? Linear is easier for users to map.
|
|
|
|
| 989 |
bbox: { x: x1, y: y1, w: w, h: h },
|
| 990 |
aim: { ...ap },
|
| 991 |
features: null,
|
| 992 |
+
baseRange_m: d.gpt_distance_m || null, // GPT is sole source of distance
|
| 993 |
baseAreaFrac: null,
|
| 994 |
baseDwell_s: null,
|
| 995 |
reqP_kW: null,
|
| 996 |
maxP_kW: null,
|
| 997 |
pkill: null,
|
| 998 |
+
// GPT properties - sole source of distance estimation
|
| 999 |
gpt_distance_m: d.gpt_distance_m,
|
| 1000 |
gpt_direction: d.gpt_direction,
|
| 1001 |
gpt_description: d.gpt_description,
|
| 1002 |
+
// Depth visualization only (not for distance)
|
| 1003 |
+
depth_rel: d.depth_rel
|
|
|
|
| 1004 |
};
|
| 1005 |
});
|
| 1006 |
// Update UI components
|
|
|
|
| 1874 |
const y1 = bbox[1] || 0;
|
| 1875 |
const x2 = bbox[2] || 0;
|
| 1876 |
const y2 = bbox[3] || 0;
|
|
|
|
| 1877 |
const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
|
|
|
|
| 1878 |
return {
|
| 1879 |
bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
|
| 1880 |
class: d.label || "drone",
|
| 1881 |
score: d.score ?? 0,
|
| 1882 |
+
depth_rel: depthRel // Visualization only, GPT handles distance
|
|
|
|
|
|
|
| 1883 |
};
|
| 1884 |
});
|
| 1885 |
}
|
|
|
|
| 1899 |
const y1 = bbox[1] || 0;
|
| 1900 |
const x2 = bbox[2] || 0;
|
| 1901 |
const y2 = bbox[3] || 0;
|
|
|
|
| 1902 |
const depthRel = Number.isFinite(d.depth_rel) ? d.depth_rel : null;
|
|
|
|
| 1903 |
return {
|
| 1904 |
bbox: [x1, y1, Math.max(1, x2 - x1), Math.max(1, y2 - y1)],
|
| 1905 |
class: d.label || "object",
|
| 1906 |
score: d.score ?? 0,
|
| 1907 |
+
depth_rel: depthRel // Visualization only, GPT handles distance
|
|
|
|
|
|
|
| 1908 |
};
|
| 1909 |
});
|
| 1910 |
}
|
|
|
|
| 2115 |
reqP_kW: null,
|
| 2116 |
maxP_kW: null,
|
| 2117 |
pkill: null,
|
| 2118 |
+
// Depth visualization only, GPT handles distance
|
| 2119 |
+
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null
|
|
|
|
| 2120 |
};
|
| 2121 |
});
|
| 2122 |
|
|
|
|
| 2331 |
|
| 2332 |
if (det.gpt_distance_m) {
|
| 2333 |
rangeStr = `${det.gpt_distance_m}m (GPT)`;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2334 |
}
|
| 2335 |
+
// No depth_est_m fallback - GPT is the sole source of distance
|
| 2336 |
|
| 2337 |
if (det.gpt_direction) {
|
| 2338 |
bearingStr = det.gpt_direction;
|
|
|
|
| 2607 |
baseRange_m: d.baseRange_m || +rangeBase.value,
|
| 2608 |
baseDwell_s: d.baseDwell_s || 4.0,
|
| 2609 |
reqP_kW: d.reqP_kW || 35,
|
| 2610 |
+
// Depth visualization (keep for depth view toggle)
|
| 2611 |
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
|
| 2612 |
+
// GPT properties - the sole source of distance estimation
|
| 2613 |
+
gpt_distance_m: d.gpt_distance_m || null,
|
| 2614 |
+
gpt_direction: d.gpt_direction || null,
|
| 2615 |
+
gpt_description: d.gpt_description || null,
|
| 2616 |
+
// Track state
|
| 2617 |
lastSeen: now(),
|
| 2618 |
vx: 0, vy: 0,
|
| 2619 |
dwellAccum: 0,
|
|
|
|
| 2645 |
baseRange_m: +rangeBase.value,
|
| 2646 |
baseDwell_s: 5.0,
|
| 2647 |
reqP_kW: 40,
|
| 2648 |
+
// Depth visualization only, GPT handles distance
|
| 2649 |
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
|
| 2650 |
+
// GPT properties
|
| 2651 |
+
gpt_distance_m: d.gpt_distance_m || null,
|
| 2652 |
+
gpt_direction: d.gpt_direction || null,
|
| 2653 |
+
gpt_description: d.gpt_description || null,
|
| 2654 |
+
// Track state
|
| 2655 |
lastSeen: now(),
|
| 2656 |
vx: 0, vy: 0,
|
| 2657 |
dwellAccum: 0,
|
|
|
|
| 2745 |
bbox: normBBox(d.bbox, w, h),
|
| 2746 |
label: d.class,
|
| 2747 |
score: d.score,
|
| 2748 |
+
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null // Visualization only
|
|
|
|
|
|
|
| 2749 |
}));
|
| 2750 |
|
| 2751 |
// mark all tracks as unmatched
|
|
|
|
| 2784 |
|
| 2785 |
tr.label = best.label || tr.label;
|
| 2786 |
tr.score = best.score || tr.score;
|
| 2787 |
+
// Update depth visualization (not for distance)
|
| 2788 |
+
if (Number.isFinite(best.depth_rel)) {
|
| 2789 |
+
tr.depth_rel = best.depth_rel;
|
|
|
|
| 2790 |
}
|
| 2791 |
tr.lastSeen = now();
|
| 2792 |
}
|
|
|
|
| 2811 |
baseRange_m: +rangeBase.value,
|
| 2812 |
baseDwell_s: 5.5,
|
| 2813 |
reqP_kW: 42,
|
| 2814 |
+
// Depth visualization only, GPT handles distance
|
| 2815 |
depth_rel: detObjs[i].depth_rel,
|
| 2816 |
+
// GPT properties (will be populated by updateTracksWithGPT)
|
| 2817 |
+
gpt_distance_m: null,
|
| 2818 |
+
gpt_direction: null,
|
| 2819 |
+
gpt_description: null,
|
| 2820 |
+
// Track state
|
| 2821 |
lastSeen: now(),
|
| 2822 |
vx: 0, vy: 0,
|
| 2823 |
dwellAccum: 0,
|
|
|
|
| 2844 |
}
|
| 2845 |
|
| 2846 |
function hasValidDepth(item) {
|
| 2847 |
+
// Only used for depth VIEW toggle, not distance
|
| 2848 |
+
return item && Number.isFinite(item.depth_rel);
|
| 2849 |
}
|
| 2850 |
|
| 2851 |
function getDisplayRange(item, fallbackRange) {
|
| 2852 |
+
// GPT is the ONLY source of distance
|
| 2853 |
+
if (item && item.gpt_distance_m) {
|
| 2854 |
+
return { range: item.gpt_distance_m, source: "GPT" };
|
| 2855 |
}
|
| 2856 |
return { range: fallbackRange, source: "area" };
|
| 2857 |
}
|
|
|
|
| 2926 |
}
|
| 2927 |
|
| 2928 |
function getTrackDisplayRange(track) {
|
| 2929 |
+
// GPT is the ONLY source of distance estimation
|
| 2930 |
if (track.gpt_distance_m) {
|
| 2931 |
return { range: track.gpt_distance_m, source: "GPT" };
|
| 2932 |
}
|
| 2933 |
+
// No fallback - return null if GPT hasn't provided distance yet
|
| 2934 |
+
return { range: null, source: null };
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2935 |
}
|
| 2936 |
|
| 2937 |
function dwellFromRange(track, range_m) {
|
|
|
|
| 3299 |
let rangeVal = 3000; // default max scale in meters
|
| 3300 |
let dist = 1000; // default unknown
|
| 3301 |
|
| 3302 |
+
if (det.gpt_distance_m) {
|
| 3303 |
+
dist = det.gpt_distance_m;
|
| 3304 |
+
} else {
|
| 3305 |
+
// No GPT yet - show at far distance (unknown)
|
| 3306 |
+
dist = 3000;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3307 |
}
|
| 3308 |
|
| 3309 |
// Log scale or Linear? Linear is easier for users to map.
|
app.py
CHANGED
|
@@ -58,6 +58,12 @@ from utils.gpt_distance import estimate_distance_gpt
|
|
| 58 |
|
| 59 |
logging.basicConfig(level=logging.INFO)
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
async def _periodic_cleanup() -> None:
|
| 63 |
while True:
|
|
|
|
| 58 |
|
| 59 |
logging.basicConfig(level=logging.INFO)
|
| 60 |
|
| 61 |
+
# Suppress noisy external libraries
|
| 62 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
| 63 |
+
logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
|
| 64 |
+
logging.getLogger("transformers").setLevel(logging.WARNING)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
|
| 68 |
async def _periodic_cleanup() -> None:
|
| 69 |
while True:
|
inference.py
CHANGED
|
@@ -203,9 +203,10 @@ def _attach_depth_metrics(
|
|
| 203 |
frame: np.ndarray,
|
| 204 |
detections: List[Dict[str, Any]],
|
| 205 |
depth_estimator_name: Optional[str],
|
| 206 |
-
depth_scale: float,
|
| 207 |
estimator_instance: Optional[Any] = None,
|
| 208 |
) -> None:
|
|
|
|
| 209 |
if not detections or (not depth_estimator_name and not estimator_instance):
|
| 210 |
return
|
| 211 |
|
|
@@ -222,7 +223,7 @@ def _attach_depth_metrics(
|
|
| 222 |
else:
|
| 223 |
estimator = load_depth_estimator(depth_estimator_name)
|
| 224 |
lock = _get_model_lock("depth", estimator.name)
|
| 225 |
-
|
| 226 |
with lock:
|
| 227 |
depth_result = estimator.predict(frame)
|
| 228 |
|
|
@@ -231,12 +232,10 @@ def _attach_depth_metrics(
|
|
| 231 |
return
|
| 232 |
|
| 233 |
height, width = depth_map.shape[:2]
|
| 234 |
-
|
| 235 |
|
| 236 |
for det in detections:
|
| 237 |
-
det["
|
| 238 |
-
det["depth_rel"] = None
|
| 239 |
-
det["depth_valid"] = False
|
| 240 |
|
| 241 |
bbox = det.get("bbox")
|
| 242 |
if not bbox or len(bbox) < 4:
|
|
@@ -251,13 +250,13 @@ def _attach_depth_metrics(
|
|
| 251 |
patch = depth_map[y1:y2, x1:x2]
|
| 252 |
if patch.size == 0:
|
| 253 |
continue
|
| 254 |
-
|
| 255 |
# Center crop (50%) to avoid background
|
| 256 |
h_p, w_p = patch.shape
|
| 257 |
cy, cx = h_p // 2, w_p // 2
|
| 258 |
dy, dx = h_p // 4, w_p // 4
|
| 259 |
center_patch = patch[cy - dy : cy + dy, cx - dx : cx + dx]
|
| 260 |
-
|
| 261 |
# Fallback to full patch if center is empty (unlikely)
|
| 262 |
if center_patch.size == 0:
|
| 263 |
center_patch = patch
|
|
@@ -267,32 +266,20 @@ def _attach_depth_metrics(
|
|
| 267 |
continue
|
| 268 |
|
| 269 |
depth_raw = float(np.median(finite))
|
| 270 |
-
if depth_raw
|
| 271 |
-
det
|
| 272 |
-
det["depth_valid"] = False
|
| 273 |
-
continue
|
| 274 |
-
|
| 275 |
-
# Inverted depth: closer objects have higher raw values
|
| 276 |
-
# Distance = Scale / RawValue
|
| 277 |
-
try:
|
| 278 |
-
depth_est = depth_scale / depth_raw
|
| 279 |
-
except ZeroDivisionError:
|
| 280 |
-
continue
|
| 281 |
-
|
| 282 |
-
det["depth_est_m"] = depth_est
|
| 283 |
-
det["depth_valid"] = True
|
| 284 |
-
valid_depths.append(depth_est)
|
| 285 |
|
| 286 |
-
if not
|
| 287 |
return
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
|
|
|
| 292 |
|
| 293 |
-
for det in
|
| 294 |
-
|
| 295 |
-
|
| 296 |
|
| 297 |
|
| 298 |
def infer_frame(
|
|
@@ -330,13 +317,13 @@ def infer_frame(
|
|
| 330 |
except Exception:
|
| 331 |
logging.exception("Depth estimation failed for frame")
|
| 332 |
|
| 333 |
-
# Re-build display labels to
|
| 334 |
display_labels = []
|
| 335 |
for i, det in enumerate(detections):
|
| 336 |
label = det["label"]
|
| 337 |
-
if det.get("
|
| 338 |
-
# Add
|
| 339 |
-
depth_str = f"{int(det['
|
| 340 |
label = f"{label} {depth_str}"
|
| 341 |
logging.debug("Object '%s' at %s (bbox: %s)", label, depth_str, det['bbox'])
|
| 342 |
display_labels.append(label)
|
|
@@ -431,23 +418,22 @@ def infer_batch(
|
|
| 431 |
return outputs
|
| 432 |
|
| 433 |
def _build_display_label(det):
|
|
|
|
| 434 |
label = det["label"]
|
| 435 |
-
if det.get("
|
| 436 |
-
|
| 437 |
-
label = f"{label} {depth_str}"
|
| 438 |
return label
|
| 439 |
|
| 440 |
def _attach_depth_from_result(detections, depth_result, depth_scale):
|
|
|
|
| 441 |
depth_map = depth_result.depth_map
|
| 442 |
if depth_map is None or depth_map.size == 0: return
|
| 443 |
-
|
| 444 |
height, width = depth_map.shape[:2]
|
| 445 |
-
|
| 446 |
-
|
| 447 |
for det in detections:
|
| 448 |
-
det["
|
| 449 |
-
det["depth_rel"] = None
|
| 450 |
-
det["depth_valid"] = False
|
| 451 |
|
| 452 |
bbox = det.get("bbox")
|
| 453 |
if not bbox or len(bbox) < 4: continue
|
|
@@ -460,7 +446,7 @@ def _attach_depth_from_result(detections, depth_result, depth_scale):
|
|
| 460 |
|
| 461 |
patch = depth_map[y1:y2, x1:x2]
|
| 462 |
if patch.size == 0: continue
|
| 463 |
-
|
| 464 |
h_p, w_p = patch.shape
|
| 465 |
cy, cx = h_p // 2, w_p // 2
|
| 466 |
dy, dx = h_p // 4, w_p // 4
|
|
@@ -471,29 +457,19 @@ def _attach_depth_from_result(detections, depth_result, depth_scale):
|
|
| 471 |
if finite.size == 0: continue
|
| 472 |
|
| 473 |
depth_raw = float(np.median(finite))
|
| 474 |
-
if depth_raw
|
| 475 |
-
|
| 476 |
-
det["depth_valid"] = False
|
| 477 |
-
continue
|
| 478 |
|
| 479 |
-
|
| 480 |
-
depth_est = depth_scale / depth_raw
|
| 481 |
-
except ZeroDivisionError:
|
| 482 |
-
continue
|
| 483 |
-
|
| 484 |
-
det["depth_est_m"] = depth_est
|
| 485 |
-
det["depth_valid"] = True
|
| 486 |
-
valid_depths.append(depth_est)
|
| 487 |
|
| 488 |
-
|
|
|
|
|
|
|
|
|
|
| 489 |
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
for det in detections:
|
| 495 |
-
if det.get("depth_valid"):
|
| 496 |
-
det["depth_rel"] = (float(det["depth_est_m"]) - min_depth) / denom
|
| 497 |
|
| 498 |
|
| 499 |
def infer_segmentation_frame(
|
|
@@ -593,11 +569,7 @@ def process_first_frame(
|
|
| 593 |
det["gpt_distance_m"] = info.get("distance_m")
|
| 594 |
det["gpt_direction"] = info.get("direction")
|
| 595 |
det["gpt_description"] = info.get("description")
|
| 596 |
-
|
| 597 |
-
# Also populate standard display fields if legacy depth is off or missing
|
| 598 |
-
if not det.get("depth_est_m"):
|
| 599 |
-
det["depth_est_m"] = info.get("distance_m") # Polyfill for UI
|
| 600 |
-
# We might want to distinguish source later
|
| 601 |
|
| 602 |
except Exception as e:
|
| 603 |
logging.error(f"GPT Distance estimation failed: {e}")
|
|
@@ -1272,8 +1244,8 @@ def run_depth_inference(
|
|
| 1272 |
for d in frame_dets:
|
| 1273 |
boxes.append(d.get("bbox"))
|
| 1274 |
lbl = d.get("label", "obj")
|
| 1275 |
-
if d.get("
|
| 1276 |
-
lbl = f"{lbl} {int(d['
|
| 1277 |
labels.append(lbl)
|
| 1278 |
colored = draw_boxes(colored, boxes=boxes, label_names=labels)
|
| 1279 |
|
|
|
|
| 203 |
frame: np.ndarray,
|
| 204 |
detections: List[Dict[str, Any]],
|
| 205 |
depth_estimator_name: Optional[str],
|
| 206 |
+
depth_scale: float, # No longer used for distance calculation
|
| 207 |
estimator_instance: Optional[Any] = None,
|
| 208 |
) -> None:
|
| 209 |
+
"""Attach relative depth values for visualization only. GPT handles distance estimation."""
|
| 210 |
if not detections or (not depth_estimator_name and not estimator_instance):
|
| 211 |
return
|
| 212 |
|
|
|
|
| 223 |
else:
|
| 224 |
estimator = load_depth_estimator(depth_estimator_name)
|
| 225 |
lock = _get_model_lock("depth", estimator.name)
|
| 226 |
+
|
| 227 |
with lock:
|
| 228 |
depth_result = estimator.predict(frame)
|
| 229 |
|
|
|
|
| 232 |
return
|
| 233 |
|
| 234 |
height, width = depth_map.shape[:2]
|
| 235 |
+
raw_depths: List[Tuple[Dict[str, Any], float]] = []
|
| 236 |
|
| 237 |
for det in detections:
|
| 238 |
+
det["depth_rel"] = None # Relative depth for visualization only
|
|
|
|
|
|
|
| 239 |
|
| 240 |
bbox = det.get("bbox")
|
| 241 |
if not bbox or len(bbox) < 4:
|
|
|
|
| 250 |
patch = depth_map[y1:y2, x1:x2]
|
| 251 |
if patch.size == 0:
|
| 252 |
continue
|
| 253 |
+
|
| 254 |
# Center crop (50%) to avoid background
|
| 255 |
h_p, w_p = patch.shape
|
| 256 |
cy, cx = h_p // 2, w_p // 2
|
| 257 |
dy, dx = h_p // 4, w_p // 4
|
| 258 |
center_patch = patch[cy - dy : cy + dy, cx - dx : cx + dx]
|
| 259 |
+
|
| 260 |
# Fallback to full patch if center is empty (unlikely)
|
| 261 |
if center_patch.size == 0:
|
| 262 |
center_patch = patch
|
|
|
|
| 266 |
continue
|
| 267 |
|
| 268 |
depth_raw = float(np.median(finite))
|
| 269 |
+
if depth_raw > 1e-6:
|
| 270 |
+
raw_depths.append((det, depth_raw))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
+
if not raw_depths:
|
| 273 |
return
|
| 274 |
|
| 275 |
+
# Compute relative depth (0-1) for visualization only
|
| 276 |
+
all_raw = [d[1] for d in raw_depths]
|
| 277 |
+
min_raw, max_raw = min(all_raw), max(all_raw)
|
| 278 |
+
denom = max(max_raw - min_raw, 1e-6)
|
| 279 |
|
| 280 |
+
for det, depth_raw in raw_depths:
|
| 281 |
+
# Inverted: higher raw = closer = lower rel value (0=close, 1=far)
|
| 282 |
+
det["depth_rel"] = 1.0 - ((depth_raw - min_raw) / denom)
|
| 283 |
|
| 284 |
|
| 285 |
def infer_frame(
|
|
|
|
| 317 |
except Exception:
|
| 318 |
logging.exception("Depth estimation failed for frame")
|
| 319 |
|
| 320 |
+
# Re-build display labels to include GPT distance if available
|
| 321 |
display_labels = []
|
| 322 |
for i, det in enumerate(detections):
|
| 323 |
label = det["label"]
|
| 324 |
+
if det.get("gpt_distance_m") is not None:
|
| 325 |
+
# Add GPT distance to label, e.g. "car 12m"
|
| 326 |
+
depth_str = f"{int(det['gpt_distance_m'])}m"
|
| 327 |
label = f"{label} {depth_str}"
|
| 328 |
logging.debug("Object '%s' at %s (bbox: %s)", label, depth_str, det['bbox'])
|
| 329 |
display_labels.append(label)
|
|
|
|
| 418 |
return outputs
|
| 419 |
|
| 420 |
def _build_display_label(det):
|
| 421 |
+
"""Build display label with GPT distance if available."""
|
| 422 |
label = det["label"]
|
| 423 |
+
if det.get("gpt_distance_m") is not None:
|
| 424 |
+
label = f"{label} {int(det['gpt_distance_m'])}m"
|
|
|
|
| 425 |
return label
|
| 426 |
|
| 427 |
def _attach_depth_from_result(detections, depth_result, depth_scale):
|
| 428 |
+
"""Attach relative depth values for visualization only. GPT handles distance estimation."""
|
| 429 |
depth_map = depth_result.depth_map
|
| 430 |
if depth_map is None or depth_map.size == 0: return
|
| 431 |
+
|
| 432 |
height, width = depth_map.shape[:2]
|
| 433 |
+
raw_depths = []
|
| 434 |
+
|
| 435 |
for det in detections:
|
| 436 |
+
det["depth_rel"] = None # Relative depth for visualization only
|
|
|
|
|
|
|
| 437 |
|
| 438 |
bbox = det.get("bbox")
|
| 439 |
if not bbox or len(bbox) < 4: continue
|
|
|
|
| 446 |
|
| 447 |
patch = depth_map[y1:y2, x1:x2]
|
| 448 |
if patch.size == 0: continue
|
| 449 |
+
|
| 450 |
h_p, w_p = patch.shape
|
| 451 |
cy, cx = h_p // 2, w_p // 2
|
| 452 |
dy, dx = h_p // 4, w_p // 4
|
|
|
|
| 457 |
if finite.size == 0: continue
|
| 458 |
|
| 459 |
depth_raw = float(np.median(finite))
|
| 460 |
+
if depth_raw > 1e-6:
|
| 461 |
+
raw_depths.append((det, depth_raw))
|
|
|
|
|
|
|
| 462 |
|
| 463 |
+
if not raw_depths: return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
|
| 465 |
+
# Compute relative depth (0-1) for visualization only
|
| 466 |
+
all_raw = [d[1] for d in raw_depths]
|
| 467 |
+
min_raw, max_raw = min(all_raw), max(all_raw)
|
| 468 |
+
denom = max(max_raw - min_raw, 1e-6)
|
| 469 |
|
| 470 |
+
for det, depth_raw in raw_depths:
|
| 471 |
+
# Inverted: higher raw = closer = lower rel value (0=close, 1=far)
|
| 472 |
+
det["depth_rel"] = 1.0 - ((depth_raw - min_raw) / denom)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 473 |
|
| 474 |
|
| 475 |
def infer_segmentation_frame(
|
|
|
|
| 569 |
det["gpt_distance_m"] = info.get("distance_m")
|
| 570 |
det["gpt_direction"] = info.get("direction")
|
| 571 |
det["gpt_description"] = info.get("description")
|
| 572 |
+
# GPT is the sole source of distance - no polyfill needed
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
except Exception as e:
|
| 575 |
logging.error(f"GPT Distance estimation failed: {e}")
|
|
|
|
| 1244 |
for d in frame_dets:
|
| 1245 |
boxes.append(d.get("bbox"))
|
| 1246 |
lbl = d.get("label", "obj")
|
| 1247 |
+
if d.get("gpt_distance_m"):
|
| 1248 |
+
lbl = f"{lbl} {int(d['gpt_distance_m'])}m"
|
| 1249 |
labels.append(lbl)
|
| 1250 |
colored = draw_boxes(colored, boxes=boxes, label_names=labels)
|
| 1251 |
|