Spaces:
Sleeping
Sleeping
Zhen Ye
commited on
Commit
·
fbd1770
1
Parent(s):
0eeb0d9
Fix frontend layout and make depth estimation optional backend-wide
Browse files- LaserPerception/LaserPerception.css +19 -9
- LaserPerception/LaserPerception.html +8 -15
- LaserPerception/LaserPerception.js +6 -128
- app.py +1 -1
- inference.py +29 -28
LaserPerception/LaserPerception.css
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
LaserPerception Design System
|
| 3 |
========================================= */
|
| 4 |
|
| 5 |
-
|
| 6 |
/* --- Colors --- */
|
| 7 |
--bg: #060914;
|
| 8 |
--panel: #0b1026;
|
|
@@ -802,6 +802,23 @@ input[type="number"]:focus {
|
|
| 802 |
color: rgba(255, 255, 255, .78);
|
| 803 |
}
|
| 804 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
.bar {
|
| 806 |
height: 10px;
|
| 807 |
border-radius: 999px;
|
|
@@ -849,11 +866,4 @@ input[type="number"]:focus {
|
|
| 849 |
|
| 850 |
::-webkit-scrollbar-thumb:hover {
|
| 851 |
background: rgba(255, 255, 255, .16);
|
| 852 |
-
}
|
| 853 |
-
|
| 854 |
-
/* Fix video sizing for uploaded files */
|
| 855 |
-
.viewbox canvas,
|
| 856 |
-
.viewbox video {
|
| 857 |
-
object-fit: contain;
|
| 858 |
-
max-height: 60vh;
|
| 859 |
-
}
|
|
|
|
| 2 |
LaserPerception Design System
|
| 3 |
========================================= */
|
| 4 |
|
| 5 |
+
:root {
|
| 6 |
/* --- Colors --- */
|
| 7 |
--bg: #060914;
|
| 8 |
--panel: #0b1026;
|
|
|
|
| 802 |
color: rgba(255, 255, 255, .78);
|
| 803 |
}
|
| 804 |
|
| 805 |
+
/* Sidebar Checkbox Row */
|
| 806 |
+
.checkbox-row {
|
| 807 |
+
grid-column: span 2;
|
| 808 |
+
margin-top: 8px;
|
| 809 |
+
border-top: 1px solid var(--stroke2);
|
| 810 |
+
padding-top: 8px;
|
| 811 |
+
display: flex;
|
| 812 |
+
align-items: center;
|
| 813 |
+
gap: 8px;
|
| 814 |
+
cursor: pointer;
|
| 815 |
+
}
|
| 816 |
+
|
| 817 |
+
.checkbox-row input[type="checkbox"] {
|
| 818 |
+
width: auto;
|
| 819 |
+
margin: 0;
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
.bar {
|
| 823 |
height: 10px;
|
| 824 |
border-radius: 999px;
|
|
|
|
| 866 |
|
| 867 |
::-webkit-scrollbar-thumb:hover {
|
| 868 |
background: rgba(255, 255, 255, .16);
|
| 869 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LaserPerception/LaserPerception.html
CHANGED
|
@@ -83,7 +83,6 @@
|
|
| 83 |
</optgroup>
|
| 84 |
</select>
|
| 85 |
</div>
|
| 86 |
-
</div>
|
| 87 |
<div>
|
| 88 |
<label>Tracking</label>
|
| 89 |
<select id="trackerSelect">
|
|
@@ -91,12 +90,11 @@
|
|
| 91 |
<option value="external">External hook (user API)</option>
|
| 92 |
</select>
|
| 93 |
</div>
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
</div>
|
| 100 |
</div>
|
| 101 |
|
| 102 |
<div class="hint mt-sm" id="detectorHint">
|
|
@@ -300,7 +298,8 @@
|
|
| 300 |
</div>
|
| 301 |
|
| 302 |
<div class="strip mt-md">
|
| 303 |
-
<span class="chip" id="chipFrameDepth"
|
|
|
|
| 304 |
</div>
|
| 305 |
</div>
|
| 306 |
|
|
@@ -313,12 +312,6 @@
|
|
| 313 |
Mission classes: <span class="kbd" id="missionClasses">—</span>
|
| 314 |
<div class="mini" id="missionId">Mission: —</div>
|
| 315 |
</div>
|
| 316 |
-
|
| 317 |
-
<!-- NEW Radar Map for Tab 1 -->
|
| 318 |
-
<div class="radar-view" style="height: 220px; margin: 10px 0; background: rgba(0,0,0,0.3); border-radius: 12px; border: 1px solid var(--stroke);">
|
| 319 |
-
<canvas id="radarCanvas1" width="400" height="220" style="width:100%; height:100%; display:block;"></canvas>
|
| 320 |
-
</div>
|
| 321 |
-
|
| 322 |
<div class="list" id="objList"></div>
|
| 323 |
</div>
|
| 324 |
|
|
@@ -545,4 +538,4 @@
|
|
| 545 |
|
| 546 |
</body>
|
| 547 |
|
| 548 |
-
</html>
|
|
|
|
| 83 |
</optgroup>
|
| 84 |
</select>
|
| 85 |
</div>
|
|
|
|
| 86 |
<div>
|
| 87 |
<label>Tracking</label>
|
| 88 |
<select id="trackerSelect">
|
|
|
|
| 90 |
<option value="external">External hook (user API)</option>
|
| 91 |
</select>
|
| 92 |
</div>
|
| 93 |
+
|
| 94 |
+
<label class="checkbox-row" for="enableDepthToggle">
|
| 95 |
+
<input type="checkbox" id="enableDepthToggle">
|
| 96 |
+
<span>Enable Legacy Depth Map (Slow)</span>
|
| 97 |
+
</label>
|
|
|
|
| 98 |
</div>
|
| 99 |
|
| 100 |
<div class="hint mt-sm" id="detectorHint">
|
|
|
|
| 298 |
</div>
|
| 299 |
|
| 300 |
<div class="strip mt-md">
|
| 301 |
+
<span class="chip" id="chipFrameDepth"
|
| 302 |
+
title="Toggle depth view of first frame (if available)">VIEW:DEFAULT</span>
|
| 303 |
</div>
|
| 304 |
</div>
|
| 305 |
|
|
|
|
| 312 |
Mission classes: <span class="kbd" id="missionClasses">—</span>
|
| 313 |
<div class="mini" id="missionId">Mission: —</div>
|
| 314 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
<div class="list" id="objList"></div>
|
| 316 |
</div>
|
| 317 |
|
|
|
|
| 538 |
|
| 539 |
</body>
|
| 540 |
|
| 541 |
+
</html>
|
LaserPerception/LaserPerception.js
CHANGED
|
@@ -138,7 +138,6 @@
|
|
| 138 |
|
| 139 |
const frameCanvas = $("#frameCanvas");
|
| 140 |
const frameOverlay = $("#frameOverlay");
|
| 141 |
-
const radarCanvas1 = $("#radarCanvas1"); // New Radar Map
|
| 142 |
const frameEmpty = $("#frameEmpty");
|
| 143 |
const frameNote = $("#frameNote");
|
| 144 |
|
|
@@ -205,7 +204,6 @@
|
|
| 205 |
const rMin = $("#rMin");
|
| 206 |
const rMax = $("#rMax");
|
| 207 |
const showPk = $("#showPk");
|
| 208 |
-
const enableDepthToggle = $("#enableDepthToggle"); // Toggle
|
| 209 |
const btnReplot = $("#btnReplot");
|
| 210 |
const btnSnap = $("#btnSnap");
|
| 211 |
|
|
@@ -874,8 +872,10 @@
|
|
| 874 |
}
|
| 875 |
// drone_detection uses drone_yolo automatically
|
| 876 |
|
| 877 |
-
// Add depth_estimator parameter for depth processing
|
|
|
|
| 878 |
const useLegacyDepth = enableDepthToggle && enableDepthToggle.checked;
|
|
|
|
| 879 |
form.append("depth_estimator", useLegacyDepth ? "depth" : "");
|
| 880 |
form.append("enable_depth", useLegacyDepth ? "true" : "false");
|
| 881 |
|
|
@@ -1982,14 +1982,9 @@
|
|
| 1982 |
reqP_kW: null,
|
| 1983 |
maxP_kW: null,
|
| 1984 |
pkill: null,
|
| 1985 |
-
|
| 1986 |
-
gpt_distance_m: d.gpt_distance_m || null,
|
| 1987 |
-
gpt_direction: d.gpt_direction || null,
|
| 1988 |
-
gpt_description: d.gpt_description || null,
|
| 1989 |
-
// Legacy Depth
|
| 1990 |
-
depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : (d.gpt_distance_m || null), // Fallback to GPT
|
| 1991 |
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
|
| 1992 |
-
depth_valid: d.depth_valid === true
|
| 1993 |
};
|
| 1994 |
});
|
| 1995 |
|
|
@@ -2029,7 +2024,6 @@
|
|
| 2029 |
state.selectedId = state.detections[0]?.id || null;
|
| 2030 |
renderObjectList();
|
| 2031 |
renderFrameOverlay();
|
| 2032 |
-
renderRadarTab1(); // New Radar Render
|
| 2033 |
renderSummary();
|
| 2034 |
renderFeatures(getSelected());
|
| 2035 |
renderTrade();
|
|
@@ -2654,26 +2648,10 @@
|
|
| 2654 |
|
| 2655 |
tr.label = best.label || tr.label;
|
| 2656 |
tr.score = best.score || tr.score;
|
| 2657 |
-
|
| 2658 |
-
// Depth smoothing with hysteresis
|
| 2659 |
if (best.depth_valid && Number.isFinite(best.depth_est_m)) {
|
| 2660 |
-
|
| 2661 |
-
if (tr.depth_est_m == null) {
|
| 2662 |
-
tr.depth_est_m = best.depth_est_m;
|
| 2663 |
-
} else {
|
| 2664 |
-
tr.depth_est_m = lerp(tr.depth_est_m, best.depth_est_m, 0.35);
|
| 2665 |
-
}
|
| 2666 |
tr.depth_rel = Number.isFinite(best.depth_rel) ? best.depth_rel : tr.depth_rel;
|
| 2667 |
tr.depth_valid = true;
|
| 2668 |
-
tr.lastDepthTime = now();
|
| 2669 |
-
} else {
|
| 2670 |
-
// Hysteresis: hold last valid depth for 0.8s
|
| 2671 |
-
if (tr.lastDepthTime && (now() - tr.lastDepthTime) < 800) {
|
| 2672 |
-
// keep existing tr.depth_est_m
|
| 2673 |
-
} else {
|
| 2674 |
-
tr.depth_valid = false;
|
| 2675 |
-
tr.depth_est_m = null; // fallback to area
|
| 2676 |
-
}
|
| 2677 |
}
|
| 2678 |
tr.lastSeen = now();
|
| 2679 |
}
|
|
@@ -3122,106 +3100,6 @@
|
|
| 3122 |
ctx.fillText("BLIPS: DEPTH RELATIVE RANGE + BEARING (area fallback)", 10, 36);
|
| 3123 |
}
|
| 3124 |
|
| 3125 |
-
// ========= Radar Tab 1 (GPT-based) =========
|
| 3126 |
-
function renderRadarTab1() {
|
| 3127 |
-
if (!radarCanvas1) return;
|
| 3128 |
-
const ctx = radarCanvas1.getContext("2d");
|
| 3129 |
-
const rect = radarCanvas1.getBoundingClientRect();
|
| 3130 |
-
const dpr = devicePixelRatio || 1;
|
| 3131 |
-
const targetW = Math.max(1, Math.floor(rect.width * dpr));
|
| 3132 |
-
const targetH = Math.max(1, Math.floor(rect.height * dpr));
|
| 3133 |
-
if (radarCanvas1.width !== targetW || radarCanvas1.height !== targetH) {
|
| 3134 |
-
radarCanvas1.width = targetW;
|
| 3135 |
-
radarCanvas1.height = targetH;
|
| 3136 |
-
}
|
| 3137 |
-
const w = radarCanvas1.width, h = radarCanvas1.height;
|
| 3138 |
-
ctx.clearRect(0, 0, w, h);
|
| 3139 |
-
|
| 3140 |
-
// background
|
| 3141 |
-
ctx.fillStyle = "rgba(0,0,0,.35)";
|
| 3142 |
-
ctx.fillRect(0, 0, w, h);
|
| 3143 |
-
|
| 3144 |
-
const cx = w * 0.5, cy = h * 0.5;
|
| 3145 |
-
const R = Math.min(w, h) * 0.42;
|
| 3146 |
-
|
| 3147 |
-
// rings
|
| 3148 |
-
ctx.strokeStyle = "rgba(255,255,255,.10)";
|
| 3149 |
-
ctx.lineWidth = 1;
|
| 3150 |
-
for (let i = 1; i <= 4; i++) {
|
| 3151 |
-
ctx.beginPath();
|
| 3152 |
-
ctx.arc(cx, cy, R * i / 4, 0, Math.PI * 2);
|
| 3153 |
-
ctx.stroke();
|
| 3154 |
-
}
|
| 3155 |
-
// cross
|
| 3156 |
-
ctx.beginPath(); ctx.moveTo(cx - R, cy); ctx.lineTo(cx + R, cy); ctx.stroke();
|
| 3157 |
-
ctx.beginPath(); ctx.moveTo(cx, cy - R); ctx.lineTo(cx, cy + R); ctx.stroke();
|
| 3158 |
-
|
| 3159 |
-
// ownship
|
| 3160 |
-
ctx.fillStyle = "rgba(34,211,238,.85)";
|
| 3161 |
-
ctx.beginPath();
|
| 3162 |
-
ctx.arc(cx, cy, 5, 0, Math.PI * 2);
|
| 3163 |
-
ctx.fill();
|
| 3164 |
-
|
| 3165 |
-
if (!state.detections.length) {
|
| 3166 |
-
ctx.fillStyle = "rgba(255,255,255,.4)";
|
| 3167 |
-
ctx.fillText("No detections", 10, 20);
|
| 3168 |
-
return;
|
| 3169 |
-
}
|
| 3170 |
-
|
| 3171 |
-
// Draw items
|
| 3172 |
-
// Find max range to scale
|
| 3173 |
-
const ranges = state.detections.map(d => d.gpt_distance_m || d.depth_est_m || 200).filter(v => v);
|
| 3174 |
-
const maxR = Math.max(200, ...ranges);
|
| 3175 |
-
|
| 3176 |
-
state.detections.forEach(d => {
|
| 3177 |
-
const dist = d.gpt_distance_m || d.depth_est_m || 50;
|
| 3178 |
-
const dirStr = d.gpt_direction || "12 o'clock";
|
| 3179 |
-
|
| 3180 |
-
// Parse clock direction
|
| 3181 |
-
let angle = -Math.PI / 2; // Default Top
|
| 3182 |
-
const match = String(dirStr).match(/(\d+)/);
|
| 3183 |
-
if (match) {
|
| 3184 |
-
let hour = parseInt(match[1]);
|
| 3185 |
-
if (hour === 12) hour = 0;
|
| 3186 |
-
angle = -Math.PI / 2 + (hour / 12) * (Math.PI * 2);
|
| 3187 |
-
}
|
| 3188 |
-
|
| 3189 |
-
// Normalize range
|
| 3190 |
-
const rNorm = clamp(dist / maxR, 0.1, 1.0) * R;
|
| 3191 |
-
|
| 3192 |
-
const px = cx + Math.cos(angle) * rNorm;
|
| 3193 |
-
const py = cy + Math.sin(angle) * rNorm;
|
| 3194 |
-
|
| 3195 |
-
const isSel = d.id === state.selectedId;
|
| 3196 |
-
|
| 3197 |
-
// Blip
|
| 3198 |
-
ctx.fillStyle = isSel ? "rgba(34,211,238,.95)" : "rgba(124,58,237,.8)";
|
| 3199 |
-
ctx.beginPath();
|
| 3200 |
-
ctx.arc(px, py, isSel ? 6 : 4, 0, Math.PI * 2);
|
| 3201 |
-
ctx.fill();
|
| 3202 |
-
|
| 3203 |
-
// Label
|
| 3204 |
-
ctx.fillStyle = "rgba(255,255,255,.8)";
|
| 3205 |
-
ctx.font = "11px monospace";
|
| 3206 |
-
ctx.fillText(d.id, px + 8, py + 4);
|
| 3207 |
-
|
| 3208 |
-
// Interaction (simple hit test logic needs inverse transform if we had click handler here)
|
| 3209 |
-
// We reuse objList click for selection, which updates this map.
|
| 3210 |
-
});
|
| 3211 |
-
|
| 3212 |
-
// Add click listener to canvas is tricky without refactoring.
|
| 3213 |
-
// We rely on ObjList and Main Canvas for selection currently.
|
| 3214 |
-
// But user asked to click on map.
|
| 3215 |
-
// I'll add a simple click handler on `radarCanvas1` element in setup if possible.
|
| 3216 |
-
// Or inline here:
|
| 3217 |
-
if (!radarCanvas1._clickAttached) {
|
| 3218 |
-
radarCanvas1._clickAttached = true;
|
| 3219 |
-
$(radarCanvas1).on("click", (e) => {
|
| 3220 |
-
// scale logic... omitted for brevity/risk, user can select via list/main view
|
| 3221 |
-
});
|
| 3222 |
-
}
|
| 3223 |
-
}
|
| 3224 |
-
|
| 3225 |
// ========= Resizing overlays to match video viewports =========
|
| 3226 |
function resizeOverlays() {
|
| 3227 |
// Engage overlay matches displayed video size
|
|
|
|
| 138 |
|
| 139 |
const frameCanvas = $("#frameCanvas");
|
| 140 |
const frameOverlay = $("#frameOverlay");
|
|
|
|
| 141 |
const frameEmpty = $("#frameEmpty");
|
| 142 |
const frameNote = $("#frameNote");
|
| 143 |
|
|
|
|
| 204 |
const rMin = $("#rMin");
|
| 205 |
const rMax = $("#rMax");
|
| 206 |
const showPk = $("#showPk");
|
|
|
|
| 207 |
const btnReplot = $("#btnReplot");
|
| 208 |
const btnSnap = $("#btnSnap");
|
| 209 |
|
|
|
|
| 872 |
}
|
| 873 |
// drone_detection uses drone_yolo automatically
|
| 874 |
|
| 875 |
+
// Add depth_estimator parameter for depth processing
|
| 876 |
+
const enableDepthToggle = document.getElementById("enableDepthToggle");
|
| 877 |
const useLegacyDepth = enableDepthToggle && enableDepthToggle.checked;
|
| 878 |
+
|
| 879 |
form.append("depth_estimator", useLegacyDepth ? "depth" : "");
|
| 880 |
form.append("enable_depth", useLegacyDepth ? "true" : "false");
|
| 881 |
|
|
|
|
| 1982 |
reqP_kW: null,
|
| 1983 |
maxP_kW: null,
|
| 1984 |
pkill: null,
|
| 1985 |
+
depth_est_m: Number.isFinite(d.depth_est_m) ? d.depth_est_m : null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1986 |
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
|
| 1987 |
+
depth_valid: d.depth_valid === true
|
| 1988 |
};
|
| 1989 |
});
|
| 1990 |
|
|
|
|
| 2024 |
state.selectedId = state.detections[0]?.id || null;
|
| 2025 |
renderObjectList();
|
| 2026 |
renderFrameOverlay();
|
|
|
|
| 2027 |
renderSummary();
|
| 2028 |
renderFeatures(getSelected());
|
| 2029 |
renderTrade();
|
|
|
|
| 2648 |
|
| 2649 |
tr.label = best.label || tr.label;
|
| 2650 |
tr.score = best.score || tr.score;
|
|
|
|
|
|
|
| 2651 |
if (best.depth_valid && Number.isFinite(best.depth_est_m)) {
|
| 2652 |
+
tr.depth_est_m = best.depth_est_m;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2653 |
tr.depth_rel = Number.isFinite(best.depth_rel) ? best.depth_rel : tr.depth_rel;
|
| 2654 |
tr.depth_valid = true;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2655 |
}
|
| 2656 |
tr.lastSeen = now();
|
| 2657 |
}
|
|
|
|
| 3100 |
ctx.fillText("BLIPS: DEPTH RELATIVE RANGE + BEARING (area fallback)", 10, 36);
|
| 3101 |
}
|
| 3102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3103 |
// ========= Resizing overlays to match video viewports =========
|
| 3104 |
function resizeOverlays() {
|
| 3105 |
// Engage overlay matches displayed video size
|
app.py
CHANGED
|
@@ -368,7 +368,7 @@ async def detect_async_endpoint(
|
|
| 368 |
output_video_path=str(output_path),
|
| 369 |
first_frame_path=str(first_frame_path),
|
| 370 |
first_frame_detections=detections,
|
| 371 |
-
depth_estimator_name=
|
| 372 |
depth_scale=float(depth_scale),
|
| 373 |
depth_output_path=str(depth_output_path),
|
| 374 |
first_frame_depth_path=str(first_frame_depth_path),
|
|
|
|
| 368 |
output_video_path=str(output_path),
|
| 369 |
first_frame_path=str(first_frame_path),
|
| 370 |
first_frame_detections=detections,
|
| 371 |
+
depth_estimator_name=active_depth,
|
| 372 |
depth_scale=float(depth_scale),
|
| 373 |
depth_output_path=str(depth_output_path),
|
| 374 |
first_frame_depth_path=str(first_frame_depth_path),
|
inference.py
CHANGED
|
@@ -426,34 +426,35 @@ def process_first_frame(
|
|
| 426 |
_DEPTH_SCALE if depth_scale is None else depth_scale,
|
| 427 |
)
|
| 428 |
|
| 429 |
-
# 2. GPT-based Distance/Direction Estimation (
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
|
|
|
| 457 |
|
| 458 |
return processed, detections
|
| 459 |
|
|
|
|
| 426 |
_DEPTH_SCALE if depth_scale is None else depth_scale,
|
| 427 |
)
|
| 428 |
|
| 429 |
+
# 2. GPT-based Distance/Direction Estimation (Now gated by enable_depth_estimator to prevent "depth" appearing when unwanted)
|
| 430 |
+
if enable_depth_estimator:
|
| 431 |
+
# We need to save the frame temporarily to pass to GPT (or refactor gpt_distance to take buffer)
|
| 432 |
+
# For now, write to temp file
|
| 433 |
+
try:
|
| 434 |
+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
|
| 435 |
+
cv2.imwrite(tmp_img.name, frame)
|
| 436 |
+
gpt_results = estimate_distance_gpt(tmp_img.name, detections)
|
| 437 |
+
os.remove(tmp_img.name) # Clean up immediatey
|
| 438 |
+
|
| 439 |
+
# Merge GPT results into detections
|
| 440 |
+
# GPT returns { "T01": { "distance_m": ..., "direction": ... } }
|
| 441 |
+
# Detections are list of dicts. We assume T01 maps to index 0, T02 to index 1...
|
| 442 |
+
for i, det in enumerate(detections):
|
| 443 |
+
# ID format matches what we constructed in gpt_distance.py
|
| 444 |
+
obj_id = f"T{str(i+1).zfill(2)}"
|
| 445 |
+
if obj_id in gpt_results:
|
| 446 |
+
info = gpt_results[obj_id]
|
| 447 |
+
det["gpt_distance_m"] = info.get("distance_m")
|
| 448 |
+
det["gpt_direction"] = info.get("direction")
|
| 449 |
+
det["gpt_description"] = info.get("description")
|
| 450 |
+
|
| 451 |
+
# Also populate standard display fields if legacy depth is off or missing
|
| 452 |
+
if not det.get("depth_est_m"):
|
| 453 |
+
det["depth_est_m"] = info.get("distance_m") # Polyfill for UI
|
| 454 |
+
# We might want to distinguish source later
|
| 455 |
+
|
| 456 |
+
except Exception as e:
|
| 457 |
+
logging.error(f"GPT Distance estimation failed: {e}")
|
| 458 |
|
| 459 |
return processed, detections
|
| 460 |
|