Zhen Ye
feat: Implement depth-based distance estimation and frontend integration
78d352c
APP.core.tracker = {};
APP.core.tracker.matchAndUpdateTracks = function (dets, dtSec) {
const { state } = APP.core;
const { CONFIG } = APP.core;
const { normBBox, lerp, now, $ } = APP.core.utils;
const { defaultAimpoint } = APP.core.physics;
const { log } = APP.ui.logging;
const videoEngage = $("#videoEngage");
const rangeBase = $("#rangeBase"); // Fixed Selector
if (!videoEngage) return;
// IOU helper
function iou(a, b) {
const ax2 = a.x + a.w, ay2 = a.y + a.h;
const bx2 = b.x + b.w, by2 = b.y + b.h;
const ix1 = Math.max(a.x, b.x), iy1 = Math.max(a.y, b.y);
const ix2 = Math.min(ax2, bx2), iy2 = Math.min(ay2, by2);
const iw = Math.max(0, ix2 - ix1), ih = Math.max(0, iy2 - iy1);
const inter = iw * ih;
const ua = a.w * a.h + b.w * b.h - inter;
return ua <= 0 ? 0 : inter / ua;
}
// Convert detections to bbox in video coordinates
const w = videoEngage.videoWidth || state.frame.w;
const h = videoEngage.videoHeight || state.frame.h;
const detObjs = dets.map(d => ({
bbox: normBBox(d.bbox, w, h),
label: d.class,
score: d.score,
depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
depth_est_m: d.depth_est_m,
depth_valid: d.depth_valid
}));
// mark all tracks as unmatched
const tracks = state.tracker.tracks;
const used = new Set();
for (const tr of tracks) {
let best = null;
let bestI = 0.0;
let bestIdx = -1;
for (let i = 0; i < detObjs.length; i++) {
if (used.has(i)) continue;
const IoU = iou(tr.bbox, detObjs[i].bbox);
if (IoU > bestI) {
bestI = IoU;
best = detObjs[i];
bestIdx = i;
}
}
// Strict matching threshold
if (best && bestI >= CONFIG.TRACK_MATCH_THRESHOLD) {
used.add(bestIdx);
// Velocity with Exponential Moving Average (EMA) for smoothing
const cx0 = tr.bbox.x + tr.bbox.w * 0.5;
const cy0 = tr.bbox.y + tr.bbox.h * 0.5;
const cx1 = best.bbox.x + best.bbox.w * 0.5;
const cy1 = best.bbox.y + best.bbox.h * 0.5;
const rawVx = (cx1 - cx0) / Math.max(1e-3, dtSec);
const rawVy = (cy1 - cy0) / Math.max(1e-3, dtSec);
// Alpha of 0.3 means 30% new value, 70% history
tr.vx = tr.vx * 0.7 + rawVx * 0.3;
tr.vy = tr.vy * 0.7 + rawVy * 0.3;
// smooth bbox update
tr.bbox.x = lerp(tr.bbox.x, best.bbox.x, 0.7);
tr.bbox.y = lerp(tr.bbox.y, best.bbox.y, 0.7);
tr.bbox.w = lerp(tr.bbox.w, best.bbox.w, 0.6);
tr.bbox.h = lerp(tr.bbox.h, best.bbox.h, 0.6);
// Logic: Only update label if the new detection is highly confident
// AND the current track doesn't have a "premium" label (like 'drone').
const protectedLabels = ["drone", "uav", "missile"];
const isProtected = protectedLabels.some(l => (tr.label || "").toLowerCase().includes(l));
if (!isProtected || (best.label && protectedLabels.some(l => best.label.toLowerCase().includes(l)))) {
tr.label = best.label || tr.label;
}
tr.score = best.score || tr.score;
if (Number.isFinite(best.depth_rel)) {
tr.depth_rel = best.depth_rel;
}
if (best.depth_valid) {
// EMA Smoothing
const newD = best.depth_est_m;
if (tr.depth_est_m == null) tr.depth_est_m = newD;
else tr.depth_est_m = tr.depth_est_m * 0.7 + newD * 0.3;
tr.depth_valid = true;
}
tr.lastSeen = now();
} else {
// Decay velocity
tr.vx *= 0.9;
tr.vy *= 0.9;
}
}
// Limit total tracks
if (tracks.length < CONFIG.MAX_TRACKS) {
for (let i = 0; i < detObjs.length; i++) {
if (used.has(i)) continue;
// create new track only if big enough
const a = detObjs[i].bbox.w * detObjs[i].bbox.h;
if (a < (w * h) * 0.0025) continue;
const newId = `T${String(state.tracker.nextId++).padStart(2, "0")}`;
const ap = defaultAimpoint(detObjs[i].label);
tracks.push({
id: newId,
label: detObjs[i].label,
bbox: { ...detObjs[i].bbox },
score: detObjs[i].score,
aimRel: { relx: ap.relx, rely: ap.rely, label: ap.label },
baseAreaFrac: (detObjs[i].bbox.w * detObjs[i].bbox.h) / (w * h),
baseRange_m: rangeBase ? +rangeBase.value : 1000,
baseDwell_s: 5.5,
reqP_kW: 42,
depth_rel: detObjs[i].depth_rel,
depth_est_m: detObjs[i].depth_est_m,
depth_valid: detObjs[i].depth_valid,
// GPT properties
gpt_distance_m: null,
gpt_direction: null,
gpt_description: null,
// Track state
lastSeen: now(),
vx: 0, vy: 0,
dwellAccum: 0,
killed: false,
state: "TRACK",
assessT: 0
});
log(`New track created: ${newId} (${detObjs[i].label})`, "t");
}
}
// prune old tracks
const tNow = now();
state.tracker.tracks = tracks.filter(tr => (tNow - tr.lastSeen) < CONFIG.TRACK_PRUNE_MS || tr.killed);
};
// Polling for backend tracks
APP.core.tracker.syncWithBackend = async function (frameIdx) {
const { state } = APP.core;
const { $ } = APP.core.utils;
const jobId = state.hf.asyncJobId;
if (!jobId || !state.hf.baseUrl) return;
try {
const resp = await fetch(`${state.hf.baseUrl}/detect/tracks/${jobId}/${frameIdx}`);
if (!resp.ok) return;
const dets = await resp.json();
if (!dets || !Array.isArray(dets)) return;
// Transform backend format to frontend track format
// Backend: { bbox: [x1, y1, x2, y2], label: "car", track_id: "T01", angle_deg: 90, ... }
// Frontend: { id: "T01", bbox: {x,y,w,h}, label: "car", angle_deg: 90, ... }
const videoEngage = $("#videoEngage");
const w = videoEngage ? (videoEngage.videoWidth || state.frame.w) : state.frame.w;
const h = videoEngage ? (videoEngage.videoHeight || state.frame.h) : state.frame.h;
const newTracks = dets.map(d => {
const x = d.bbox[0], y = d.bbox[1];
const wBox = d.bbox[2] - d.bbox[0];
const hBox = d.bbox[3] - d.bbox[1];
// Normalize
const nx = x / w;
const ny = y / h;
const nw = wBox / w;
const nh = hBox / h;
return {
id: d.track_id || `T${Math.floor(Math.random() * 1000)}`, // Fallback
label: d.label,
bbox: { x: nx, y: ny, w: nw, h: nh },
score: d.score,
angle_deg: d.angle_deg,
gpt_distance_m: d.gpt_distance_m,
angle_deg: d.angle_deg,
gpt_distance_m: d.gpt_distance_m,
speed_kph: d.speed_kph,
depth_est_m: d.depth_est_m,
depth_rel: d.depth_rel,
depth_valid: d.depth_valid,
// Keep UI state fields
lastSeen: Date.now(),
state: "TRACK"
};
});
// Update state
state.tracker.tracks = newTracks;
state.detections = newTracks; // Keep synced
} catch (e) {
console.warn("Track sync failed", e);
}
};
APP.core.tracker.predictTracks = function (dtSec) {
const { state } = APP.core;
const { $ } = APP.core.utils;
const videoEngage = $("#videoEngage");
if (!videoEngage) return;
const w = videoEngage.videoWidth || state.frame.w;
const h = videoEngage.videoHeight || state.frame.h;
// Simple clamp util locally or imported
const clamp = (val, min, max) => Math.min(max, Math.max(min, val));
state.tracker.tracks.forEach(tr => {
if (tr.killed) return;
tr.bbox.x = clamp(tr.bbox.x + tr.vx * dtSec * 0.12, 0, w - 1);
tr.bbox.y = clamp(tr.bbox.y + tr.vy * dtSec * 0.12, 0, h - 1);
});
};