Spaces:

BiasLab2025
/

perception

Sleeping

App Files Files Community

perception / frontend /js /core /tracker.js

Zhen Ye

feat: Implement depth-based distance estimation and frontend integration

78d352c 9 days ago

history blame contribute delete

8.51 kB

	APP.core.tracker = {};

	APP.core.tracker.matchAndUpdateTracks = function (dets, dtSec) {
	const { state } = APP.core;
	const { CONFIG } = APP.core;
	const { normBBox, lerp, now, $ } = APP.core.utils;
	const { defaultAimpoint } = APP.core.physics;
	const { log } = APP.ui.logging;

	const videoEngage = $("#videoEngage");
	const rangeBase = $("#rangeBase"); // Fixed Selector

	if (!videoEngage) return;

	// IOU helper
	function iou(a, b) {
	const ax2 = a.x + a.w, ay2 = a.y + a.h;
	const bx2 = b.x + b.w, by2 = b.y + b.h;
	const ix1 = Math.max(a.x, b.x), iy1 = Math.max(a.y, b.y);
	const ix2 = Math.min(ax2, bx2), iy2 = Math.min(ay2, by2);
	const iw = Math.max(0, ix2 - ix1), ih = Math.max(0, iy2 - iy1);
	const inter = iw * ih;
	const ua = a.w * a.h + b.w * b.h - inter;
	return ua <= 0 ? 0 : inter / ua;
	}

	// Convert detections to bbox in video coordinates
	const w = videoEngage.videoWidth \|\| state.frame.w;
	const h = videoEngage.videoHeight \|\| state.frame.h;

	const detObjs = dets.map(d => ({
	bbox: normBBox(d.bbox, w, h),
	label: d.class,
	score: d.score,
	depth_rel: Number.isFinite(d.depth_rel) ? d.depth_rel : null,
	depth_est_m: d.depth_est_m,
	depth_valid: d.depth_valid
	}));

	// mark all tracks as unmatched
	const tracks = state.tracker.tracks;
	const used = new Set();

	for (const tr of tracks) {
	let best = null;
	let bestI = 0.0;
	let bestIdx = -1;
	for (let i = 0; i < detObjs.length; i++) {
	if (used.has(i)) continue;
	const IoU = iou(tr.bbox, detObjs[i].bbox);
	if (IoU > bestI) {
	bestI = IoU;
	best = detObjs[i];
	bestIdx = i;
	}
	}

	// Strict matching threshold
	if (best && bestI >= CONFIG.TRACK_MATCH_THRESHOLD) {
	used.add(bestIdx);

	// Velocity with Exponential Moving Average (EMA) for smoothing
	const cx0 = tr.bbox.x + tr.bbox.w * 0.5;
	const cy0 = tr.bbox.y + tr.bbox.h * 0.5;
	const cx1 = best.bbox.x + best.bbox.w * 0.5;
	const cy1 = best.bbox.y + best.bbox.h * 0.5;

	const rawVx = (cx1 - cx0) / Math.max(1e-3, dtSec);
	const rawVy = (cy1 - cy0) / Math.max(1e-3, dtSec);

	// Alpha of 0.3 means 30% new value, 70% history
	tr.vx = tr.vx * 0.7 + rawVx * 0.3;
	tr.vy = tr.vy * 0.7 + rawVy * 0.3;

	// smooth bbox update
	tr.bbox.x = lerp(tr.bbox.x, best.bbox.x, 0.7);
	tr.bbox.y = lerp(tr.bbox.y, best.bbox.y, 0.7);
	tr.bbox.w = lerp(tr.bbox.w, best.bbox.w, 0.6);
	tr.bbox.h = lerp(tr.bbox.h, best.bbox.h, 0.6);

	// Logic: Only update label if the new detection is highly confident
	// AND the current track doesn't have a "premium" label (like 'drone').
	const protectedLabels = ["drone", "uav", "missile"];
	const isProtected = protectedLabels.some(l => (tr.label \|\| "").toLowerCase().includes(l));

	if (!isProtected \|\| (best.label && protectedLabels.some(l => best.label.toLowerCase().includes(l)))) {
	tr.label = best.label \|\| tr.label;
	}

	tr.score = best.score \|\| tr.score;
	if (Number.isFinite(best.depth_rel)) {
	tr.depth_rel = best.depth_rel;
	}
	if (best.depth_valid) {
	// EMA Smoothing
	const newD = best.depth_est_m;
	if (tr.depth_est_m == null) tr.depth_est_m = newD;
	else tr.depth_est_m = tr.depth_est_m * 0.7 + newD * 0.3;
	tr.depth_valid = true;
	}
	tr.lastSeen = now();
	} else {
	// Decay velocity
	tr.vx *= 0.9;
	tr.vy *= 0.9;
	}
	}

	// Limit total tracks
	if (tracks.length < CONFIG.MAX_TRACKS) {
	for (let i = 0; i < detObjs.length; i++) {
	if (used.has(i)) continue;
	// create new track only if big enough
	const a = detObjs[i].bbox.w * detObjs[i].bbox.h;
	if (a < (w * h) * 0.0025) continue;

	const newId = `T${String(state.tracker.nextId++).padStart(2, "0")}`;
	const ap = defaultAimpoint(detObjs[i].label);
	tracks.push({
	id: newId,
	label: detObjs[i].label,
	bbox: { ...detObjs[i].bbox },
	score: detObjs[i].score,
	aimRel: { relx: ap.relx, rely: ap.rely, label: ap.label },
	baseAreaFrac: (detObjs[i].bbox.w * detObjs[i].bbox.h) / (w * h),
	baseRange_m: rangeBase ? +rangeBase.value : 1000,
	baseDwell_s: 5.5,
	reqP_kW: 42,
	depth_rel: detObjs[i].depth_rel,
	depth_est_m: detObjs[i].depth_est_m,
	depth_valid: detObjs[i].depth_valid,

	// GPT properties
	gpt_distance_m: null,
	gpt_direction: null,
	gpt_description: null,

	// Track state
	lastSeen: now(),
	vx: 0, vy: 0,
	dwellAccum: 0,
	killed: false,
	state: "TRACK",
	assessT: 0
	});
	log(`New track created: ${newId} (${detObjs[i].label})`, "t");
	}
	}

	// prune old tracks
	const tNow = now();
	state.tracker.tracks = tracks.filter(tr => (tNow - tr.lastSeen) < CONFIG.TRACK_PRUNE_MS \|\| tr.killed);
	};

	// Polling for backend tracks
	APP.core.tracker.syncWithBackend = async function (frameIdx) {
	const { state } = APP.core;
	const { $ } = APP.core.utils;
	const jobId = state.hf.asyncJobId;

	if (!jobId \|\| !state.hf.baseUrl) return;

	try {
	const resp = await fetch(`${state.hf.baseUrl}/detect/tracks/${jobId}/${frameIdx}`);
	if (!resp.ok) return;

	const dets = await resp.json();
	if (!dets \|\| !Array.isArray(dets)) return;

	// Transform backend format to frontend track format
	// Backend: { bbox: [x1, y1, x2, y2], label: "car", track_id: "T01", angle_deg: 90, ... }
	// Frontend: { id: "T01", bbox: {x,y,w,h}, label: "car", angle_deg: 90, ... }

	const videoEngage = $("#videoEngage");
	const w = videoEngage ? (videoEngage.videoWidth \|\| state.frame.w) : state.frame.w;
	const h = videoEngage ? (videoEngage.videoHeight \|\| state.frame.h) : state.frame.h;

	const newTracks = dets.map(d => {
	const x = d.bbox[0], y = d.bbox[1];
	const wBox = d.bbox[2] - d.bbox[0];
	const hBox = d.bbox[3] - d.bbox[1];

	// Normalize
	const nx = x / w;
	const ny = y / h;
	const nw = wBox / w;
	const nh = hBox / h;

	return {
	id: d.track_id \|\| `T${Math.floor(Math.random() * 1000)}`, // Fallback
	label: d.label,
	bbox: { x: nx, y: ny, w: nw, h: nh },
	score: d.score,
	angle_deg: d.angle_deg,
	gpt_distance_m: d.gpt_distance_m,
	angle_deg: d.angle_deg,
	gpt_distance_m: d.gpt_distance_m,
	speed_kph: d.speed_kph,
	depth_est_m: d.depth_est_m,
	depth_rel: d.depth_rel,
	depth_valid: d.depth_valid,

	// Keep UI state fields
	lastSeen: Date.now(),
	state: "TRACK"
	};
	});

	// Update state
	state.tracker.tracks = newTracks;
	state.detections = newTracks; // Keep synced

	} catch (e) {
	console.warn("Track sync failed", e);
	}
	};

	APP.core.tracker.predictTracks = function (dtSec) {
	const { state } = APP.core;
	const { $ } = APP.core.utils;
	const videoEngage = $("#videoEngage");
	if (!videoEngage) return;
	const w = videoEngage.videoWidth \|\| state.frame.w;
	const h = videoEngage.videoHeight \|\| state.frame.h;

	// Simple clamp util locally or imported
	const clamp = (val, min, max) => Math.min(max, Math.max(min, val));

	state.tracker.tracks.forEach(tr => {
	if (tr.killed) return;
	tr.bbox.x = clamp(tr.bbox.x + tr.vx * dtSec * 0.12, 0, w - 1);
	tr.bbox.y = clamp(tr.bbox.y + tr.vy * dtSec * 0.12, 0, h - 1);
	});
	};