Spaces:

BiasLab2025
/

perception

Sleeping

App Files Files Community

Zhen Ye commited on 19 days ago

Commit

dc409c8

1 Parent(s): 3bcc90a

fix: add wget to Dockerfile, cleanup js errors, and remove GPU env override

Browse files

Files changed (4) hide show

Dockerfile +1 -0
LaserPerception/LaserPerception.css +1 -1
LaserPerception/LaserPerception.js +220 -69
inference.py +6 -5

Dockerfile CHANGED Viewed

@@ -8,6 +8,7 @@ WORKDIR /app
 COPY requirements.txt ./
 RUN apt-get update && apt-get install -y --no-install-recommends \
         git \
         libgl1 \
         libglib2.0-0 \
         ffmpeg \

 COPY requirements.txt ./
 RUN apt-get update && apt-get install -y --no-install-recommends \
         git \
+        wget \
         libgl1 \
         libglib2.0-0 \
         ffmpeg \

LaserPerception/LaserPerception.css CHANGED Viewed

@@ -700,7 +700,7 @@ input[type="number"]:focus {
 .frame-grid {
     display: grid;
     grid-template-columns: 1.6fr .9fr;
-    grid-template-rows: auto auto 240px;
     gap: 12px;
     min-height: 0;
 }

 .frame-grid {
     display: grid;
     grid-template-columns: 1.6fr .9fr;
+    grid-template-rows: auto auto 1fr;
     gap: 12px;
     min-height: 0;
 }

LaserPerception/LaserPerception.js CHANGED Viewed

@@ -142,7 +142,8 @@
     const frameEmpty = $("#frameEmpty");
     const frameNote = $("#frameNote");
-    const objList = $("#objList");
     const objCount = $("#objCount");
     const featureTable = $("#featureTable");
     const selId = $("#selId");
@@ -818,6 +819,33 @@
         const kind = sel.kind;
         const videoFile = state.videoFile;
         if (!videoFile) {
             throw new Error("No video loaded");
         }
@@ -925,6 +953,46 @@
                 log("First frame ready (no detections payload)", "t");
             }
             displayAsyncFirstFrame();
         }
         log(`Backend job ID: ${data.job_id} (polling every 3s)`, "t");
@@ -1943,7 +2011,7 @@
         if (state.ui.cursorMode === "on") {
             moveCursorToRect(btnReason.getBoundingClientRect());
             setTimeout(() => moveCursorToRect(frameCanvas.getBoundingClientRect()), 260);
-            setTimeout(() => moveCursorToRect(objList.getBoundingClientRect()), 560);
             // setTimeout(() => moveCursorToRect(summaryTable.getBoundingClientRect()), 880);
         }
@@ -2113,7 +2181,7 @@
     async function recomputeHEL() {
         if (!state.detections.length) return;
         const knobs = getKnobs();
-        summaryStamp.textContent = "Computing...";
         try {
             const result = await externalHEL(state.detections, knobs);
@@ -2151,7 +2219,7 @@
             console.error("HEL recompute failed", err);
         }
-        summaryStamp.textContent = new Date().toLocaleTimeString();
         // renderSummary();
         refreshTradeTargets();
     }
@@ -2233,7 +2301,7 @@
             card.innerHTML = `
                 <div class="track-card-header">
-                    <span>${id} · ${det.label} ${gptBadge}</span>
                     <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
                 </div>
                 <div class="track-card-meta">
@@ -3002,107 +3070,190 @@
         const ctx = frameRadar.getContext("2d");
         const rect = frameRadar.getBoundingClientRect();
         const dpr = devicePixelRatio || 1;
         const targetW = Math.max(1, Math.floor(rect.width * dpr));
         const targetH = Math.max(1, Math.floor(rect.height * dpr));
         if (frameRadar.width !== targetW || frameRadar.height !== targetH) {
             frameRadar.width = targetW;
             frameRadar.height = targetH;
         }
         const w = frameRadar.width, h = frameRadar.height;
         ctx.clearRect(0, 0, w, h);
-        // Background
-        ctx.fillStyle = "rgba(0,0,0,.35)";
         ctx.fillRect(0, 0, w, h);
-        const cx = w * 0.5, cy = h * 0.5;
-        const R = Math.min(w, h) * 0.42;
-        // Rings
-        ctx.strokeStyle = "rgba(255,255,255,.10)";
         ctx.lineWidth = 1;
         for (let i = 1; i <= 4; i++) {
             ctx.beginPath();
-            ctx.arc(cx, cy, R * i / 4, 0, Math.PI * 2);
             ctx.stroke();
         }
-        // Crosshair
-        ctx.beginPath(); ctx.moveTo(cx - R, cy); ctx.lineTo(cx + R, cy); ctx.stroke();
-        ctx.beginPath(); ctx.moveTo(cx, cy - R); ctx.lineTo(cx, cy + R); ctx.stroke();
-        // Sweep Animation
-        const t = now() / 1000;
-        const ang = (t * 0.65) % (Math.PI * 2);
-        ctx.strokeStyle = "rgba(34,211,238,.22)";
-        ctx.lineWidth = 2;
         ctx.beginPath();
         ctx.moveTo(cx, cy);
         ctx.lineTo(cx + Math.cos(ang) * R, cy + Math.sin(ang) * R);
         ctx.stroke();
-        // Ownship
-        ctx.fillStyle = "rgba(34,211,238,.85)";
         ctx.beginPath();
-        ctx.arc(cx, cy, 5, 0, Math.PI * 2);
         ctx.fill();
-        // Draw Detections from State
         if (state.detections) {
-            state.detections.forEach((det, i) => {
-                // Skip if no box
-                if (!det.box) return;
-                // Calculate Range
-                let range_m = 1500; // fallback
-                if (det.depth_est_m) range_m = det.depth_est_m;
-                else if (det.gpt_distance_m) range_m = det.gpt_distance_m;
-                else {
-                    // Area fallback
-                    // det.box is [x1, y1, x2, y2]
-                    const [x1, y1, x2, y2] = det.box;
-                    const bw = x2 - x1;
-                    const bh = y2 - y1;
-                    const area = (bw * bh) / (state.frame.w * state.frame.h);
-                    // Rough heuristic matching default
-                    range_m = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
                 }
-                // Calculate Bearing
-                const [x1, y1, x2, y2] = det.box;
-                const bx = (x1 + x2) / 2;
-                const by = (y1 + y2) / 2;
-                const tx = bx / state.frame.w - 0.5;
-                const ty = by / state.frame.h - 0.5;
-                const bearing = Math.atan2(ty, tx);
-                // Polar to Cartesian
-                const maxRange = 6000; // align with radar scale
-                const normRange = clamp(range_m / maxRange, 0.05, 1.0);
-                const rad = normRange * (R / 1.0); // full R = maxRange
-                // Draw Blip
-                const px = cx + Math.cos(bearing) * rad;
-                const py = cy + Math.sin(bearing) * rad;
-                ctx.fillStyle = "rgba(124,58,237,.9)"; // default purple
-                if (det.label === "person") ctx.fillStyle = "rgba(239,68,68,.9)"; // red
                 ctx.beginPath();
-                ctx.arc(px, py, 4, 0, Math.PI * 2);
                 ctx.fill();
-                // Label
-                ctx.fillStyle = "rgba(255,255,255,.75)";
-                ctx.font = "10px " + getComputedStyle(document.body).fontFamily;
-                ctx.fillText(`${det.label} ${Math.round(range_m)}m`, px + 6, py + 3);
             });
         }
         requestAnimationFrame(renderFrameRadar);
     }
     // Start loop immediately

     const frameEmpty = $("#frameEmpty");
     const frameNote = $("#frameNote");
+    // const objList = $("#objList"); // Removed
+    // const objList = $("#objList"); // Removed
     const objCount = $("#objCount");
     const featureTable = $("#featureTable");
     const selId = $("#selId");
         const kind = sel.kind;
         const videoFile = state.videoFile;
+        // Reset State & UI for new run
+        state.detections = [];
+        state.selectedId = null;
+        state.tracker.tracks = []; // Clear tracking state too
+        // Clear cached backend results so they don't reappear
+        state.hf.firstFrameDetections = null;
+        // Explicitly clear UI using standard renderers
+        renderFrameTrackList();
+        renderFrameOverlay();
+        // Force a clear of the radar canvas (renderFrameRadar loop will pick up empty state next frame)
+        if (frameRadar) {
+            const ctx = frameRadar.getContext("2d");
+            ctx.clearRect(0, 0, frameRadar.width, frameRadar.height);
+        }
+        // Clear counts
+        if (trackCount) trackCount.textContent = "0";
+        if (objCount) objCount.textContent = "0";
+        // Show loading state in list manually if needed, or let renderFrameTrackList handle it (it shows "No objects tracked")
+        // But we want "Computing..."
+        if (frameTrackList) frameTrackList.innerHTML = '<div style="font-style:italic; color:var(--text-dim); text-align:center; margin-top:20px;">Computing...</div>';
+        renderFeatures(null); // Clear feature panel
         if (!videoFile) {
             throw new Error("No video loaded");
         }
                 log("First frame ready (no detections payload)", "t");
             }
             displayAsyncFirstFrame();
+            // Populate state.detections with backend results so Radar and Cards work
+            if (state.hf.firstFrameDetections) {
+                state.detections = state.hf.firstFrameDetections.map((d, i) => {
+                    const id = `T${String(i + 1).padStart(2, '0')}`;
+                    const [x1, y1, x2, y2] = d.bbox || [0, 0, 0, 0];
+                    const w = x2 - x1;
+                    const h = y2 - y1;
+                    const ap = defaultAimpoint(d.label); // Ensure defaultAimpoint is accessible
+                    return {
+                        id,
+                        label: d.label,
+                        score: d.score,
+                        bbox: { x: x1, y: y1, w: w, h: h },
+                        aim: { ...ap },
+                        features: null,
+                        baseRange_m: d.gpt_distance_m || d.depth_est_m || null, // Priority to GPT/Depth
+                        baseAreaFrac: null,
+                        baseDwell_s: null,
+                        reqP_kW: null,
+                        maxP_kW: null,
+                        pkill: null,
+                        // Pass through backend props
+                        gpt_distance_m: d.gpt_distance_m,
+                        gpt_direction: d.gpt_direction,
+                        gpt_description: d.gpt_description,
+                        depth_est_m: d.depth_est_m,
+                        depth_rel: d.depth_rel,
+                        depth_valid: !!(d.depth_est_m || d.gpt_distance_m)
+                    };
+                });
+                // Update UI components
+                log(`Populating UI with ${state.detections.length} tracked objects`, "t");
+                renderFrameTrackList();
+                renderFrameRadar();
+                renderFeatures(null);
+                renderTrade();
+                renderFrameOverlay();
+            }
         }
         log(`Backend job ID: ${data.job_id} (polling every 3s)`, "t");
         if (state.ui.cursorMode === "on") {
             moveCursorToRect(btnReason.getBoundingClientRect());
             setTimeout(() => moveCursorToRect(frameCanvas.getBoundingClientRect()), 260);
+            setTimeout(() => moveCursorToRect(frameTrackList.getBoundingClientRect()), 560);
             // setTimeout(() => moveCursorToRect(summaryTable.getBoundingClientRect()), 880);
         }
     async function recomputeHEL() {
         if (!state.detections.length) return;
         const knobs = getKnobs();
+        // summaryStamp.textContent = "Computing...";
         try {
             const result = await externalHEL(state.detections, knobs);
             console.error("HEL recompute failed", err);
         }
+        // summaryStamp.textContent = new Date().toLocaleTimeString();
         // renderSummary();
         refreshTradeTargets();
     }
             card.innerHTML = `
                 <div class="track-card-header">
+                    <span>${id} · ${det.label}</span>
                     <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
                 </div>
                 <div class="track-card-meta">
         const ctx = frameRadar.getContext("2d");
         const rect = frameRadar.getBoundingClientRect();
         const dpr = devicePixelRatio || 1;
+        // Resize if needed
         const targetW = Math.max(1, Math.floor(rect.width * dpr));
         const targetH = Math.max(1, Math.floor(rect.height * dpr));
         if (frameRadar.width !== targetW || frameRadar.height !== targetH) {
             frameRadar.width = targetW;
             frameRadar.height = targetH;
         }
         const w = frameRadar.width, h = frameRadar.height;
+        const cx = w * 0.5, cy = h * 0.5;
+        const R = Math.min(w, h) * 0.45; // Max radius
         ctx.clearRect(0, 0, w, h);
+        // --- 1. Background (Tactical Grid) ---
+        ctx.fillStyle = "#0a0f22"; // Matches --panel2
         ctx.fillRect(0, 0, w, h);
+        // Grid Rings (Concentric)
+        ctx.strokeStyle = "rgba(34, 211, 238, 0.1)"; // Cyan faint
         ctx.lineWidth = 1;
         for (let i = 1; i <= 4; i++) {
             ctx.beginPath();
+            ctx.arc(cx, cy, R * (i / 4), 0, Math.PI * 2);
             ctx.stroke();
         }
+        // Grid Spokes (Cross + Diagonals)
+        ctx.beginPath();
+        // Cardinals
+        ctx.moveTo(cx - R, cy); ctx.lineTo(cx + R, cy);
+        ctx.moveTo(cx, cy - R); ctx.lineTo(cx, cy + R);
+        // Diagonals (optional, maybe too busy? let's stick to cleaning cardinals)
+        ctx.stroke();
+        // --- 2. Sweep Animation ---
+        const t = now() / 1500; // Slower, more deliberate sweep
+        const ang = (t * (Math.PI * 2)) % (Math.PI * 2);
+        const grad = ctx.createConicGradient(ang + Math.PI / 2, cx, cy); // Offset to start at 0
+        grad.addColorStop(0, "transparent");
+        grad.addColorStop(0.1, "transparent");
+        grad.addColorStop(0.8, "rgba(34, 211, 238, 0.0)");
+        grad.addColorStop(1, "rgba(34, 211, 238, 0.15)"); // Trailing edge
+        ctx.fillStyle = grad;
+        ctx.beginPath();
+        ctx.arc(cx, cy, R, 0, Math.PI * 2);
+        ctx.fill();
+        // Scan Line
+        ctx.strokeStyle = "rgba(34, 211, 238, 0.6)";
+        ctx.lineWidth = 1.5;
         ctx.beginPath();
         ctx.moveTo(cx, cy);
         ctx.lineTo(cx + Math.cos(ang) * R, cy + Math.sin(ang) * R);
         ctx.stroke();
+        // --- 3. Ownship (Center) ---
+        ctx.fillStyle = "#22d3ee"; // Cyan
         ctx.beginPath();
+        ctx.arc(cx, cy, 3, 0, Math.PI * 2);
         ctx.fill();
+        // Ring around ownship
+        ctx.strokeStyle = "rgba(34, 211, 238, 0.5)";
+        ctx.lineWidth = 1;
+        ctx.beginPath();
+        ctx.arc(cx, cy, 6, 0, Math.PI * 2);
+        ctx.stroke();
+        // --- 4. Render Detections ---
         if (state.detections) {
+            state.detections.forEach(det => {
+                // Determine Range (pixels)
+                // Map logical range (meters) to graphical range (0..R)
+                let rangeVal = 3000; // default max scale in meters
+                let dist = 1000; // default unknown
+                if (det.gpt_distance_m) dist = det.gpt_distance_m;
+                else if (det.depth_est_m) dist = det.depth_est_m;
+                else if (det.box) {
+                    // Fallback area heuristic
+                    // In hfAsync, we stored bbox: {x,y,w,h} in PIXELS.
+                    // We need frame dimensions.
+                    const fw = state.frame.w || 1280;
+                    const fh = state.frame.h || 720;
+                    const area = (det.bbox.w * det.bbox.h) / (fw * fh);
+                    // Heuristic: Area 0.01 => ~2000m, Area 0.5 => ~280m
+                    dist = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
                 }
+                // Log scale or Linear? Linear is easier for users to map.
+                // Let's use linear: 0m -> 0px, 1500m -> R
+                const maxRangeM = 1500;
+                const rPx = (clamp(dist, 0, maxRangeM) / maxRangeM) * R;
+                // Determine Bearing
+                // box center relative to frame center
+                const bx = det.bbox.x + det.bbox.w * 0.5;
+                const fw = state.frame.w || 1280;
+                const tx = (bx / fw) - 0.5; // -0.5 (left) to 0.5 (right)
+                // Map x-axis (-0.5 to 0.5) to angle.
+                // FOV assumption: ~60 degrees?
+                const fovRad = (60 * Math.PI) / 180;
+                // Actually canvas 0 is Right (0 rad).
+                // We want Up (-PI/2) to be center.
+                // So center (tx=0) should be -PI/2.
+                // Left (tx=-0.5) => -PI/2 - fov/2.
+                // Right (tx=0.5) => -PI/2 + fov/2.
+                const angle = (-Math.PI / 2) + (tx * fovRad);
+                // --- Draw Blip ---
+                const px = cx + Math.cos(angle) * rPx;
+                const py = cy + Math.sin(angle) * rPx;
+                const isSelected = (state.selectedId === det.id);
+                // Glow for selected
+                if (isSelected) {
+                    ctx.shadowBlur = 10;
+                    ctx.shadowColor = "#f59e0b"; // Amber glow
+                } else {
+                    ctx.shadowBlur = 0;
+                }
+                // Blip Color
+                // If it has GPT data, maybe special color? Or just distinct per class?
+                let col = "#7c3aed"; // Default violet
+                if (det.label === 'person') col = "#ef4444"; // Red
+                if (det.label === 'airplane') col = "#f59e0b"; // Amber
+                if (isSelected) col = "#ffffff"; // White for selected
+                ctx.fillStyle = col;
                 ctx.beginPath();
+                ctx.arc(px, py, isSelected ? 5 : 3.5, 0, Math.PI * 2);
                 ctx.fill();
+                // Blip Label (if selected or hovered - just show ID)
+                // Just Show ID for all? Might clutter. Show for selected.
+                if (isSelected) {
+                    ctx.fillStyle = "#fff";
+                    ctx.font = "bold 11px monospace";
+                    ctx.fillText(det.id, px + 8, py + 3);
+                    // Connected Line to center
+                    ctx.strokeStyle = "rgba(255, 255, 255, 0.4)";
+                    ctx.lineWidth = 1;
+                    ctx.setLineDash([2, 2]); // Optional: dashed line for "targeting" feel
+                    ctx.beginPath();
+                    ctx.moveTo(cx, cy);
+                    ctx.lineTo(px, py);
+                    ctx.stroke();
+                    ctx.setLineDash([]); // Reset
+                    // Distance Label on Line
+                    const mx = (cx + px) * 0.5;
+                    const my = (cy + py) * 0.5;
+                    const distStr = `${Math.round(dist)}m`;
+                    ctx.font = "10px monospace";
+                    const tm = ctx.measureText(distStr);
+                    const tw = tm.width;
+                    const th = 10;
+                    // Label Background
+                    ctx.fillStyle = "rgba(10, 15, 34, 0.85)";
+                    ctx.fillRect(mx - tw / 2 - 3, my - th / 2 - 2, tw + 6, th + 4);
+                    // Label Text
+                    ctx.fillStyle = "#22d3ee"; // Cyan
+                    ctx.textAlign = "center";
+                    ctx.textBaseline = "middle";
+                    ctx.fillText(distStr, mx, my);
+                    // Reset text alignment
+                    ctx.textAlign = "start";
+                    ctx.textBaseline = "alphabetic";
+                }
+                ctx.shadowBlur = 0; // reset
             });
         }
         requestAnimationFrame(renderFrameRadar);
     }
     // Start loop immediately

inference.py CHANGED Viewed

@@ -435,6 +435,7 @@ def process_first_frame(
             with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
                 cv2.imwrite(tmp_img.name, frame)
                 gpt_results = estimate_distance_gpt(tmp_img.name, detections)
                 os.remove(tmp_img.name) # Clean up immediatey
             # Merge GPT results into detections
@@ -503,9 +504,9 @@ def run_inference(
     # Clear CUDA_VISIBLE_DEVICES to ensure we see all GPUs if not already handled
     # This must be done BEFORE any torch.cuda calls in this scope if the env was modified externally
-    if "CUDA_VISIBLE_DEVICES" in os.environ:
-         logging.info("[DEBUG] Deleting CUDA_VISIBLE_DEVICES from env")
-         del os.environ["CUDA_VISIBLE_DEVICES"]
     num_gpus = torch.cuda.device_count()
     logging.info(f"[DEBUG] num_gpus after clear: {num_gpus}")
@@ -571,7 +572,7 @@ def run_inference(
             frame_idx, frame_data = item
             if frame_idx % 30 == 0:
-                logging.info("Processing frame %d on device %s", frame_idx, "cpu" if num_gpus==0 else f"cuda:{gpu_idx}")
             try:
                 # Depth strategy: Run every 3 frames
@@ -766,7 +767,7 @@ def run_segmentation(
             idx, frame = item
             if idx % 30 == 0:
-                logging.info("Segmenting frame %d (GPU %d)", idx, gpu_idx)
             try:
                 processed, _ = infer_segmentation_frame(

             with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
                 cv2.imwrite(tmp_img.name, frame)
                 gpt_results = estimate_distance_gpt(tmp_img.name, detections)
+                logging.info(f"GPT Output for First Frame:\n{gpt_results}") # Expose to HF logs
                 os.remove(tmp_img.name) # Clean up immediatey
             # Merge GPT results into detections
     # Clear CUDA_VISIBLE_DEVICES to ensure we see all GPUs if not already handled
     # This must be done BEFORE any torch.cuda calls in this scope if the env was modified externally
+    # if "CUDA_VISIBLE_DEVICES" in os.environ:
+    #      logging.info("[DEBUG] Deleting CUDA_VISIBLE_DEVICES from env")
+    #      del os.environ["CUDA_VISIBLE_DEVICES"]
     num_gpus = torch.cuda.device_count()
     logging.info(f"[DEBUG] num_gpus after clear: {num_gpus}")
             frame_idx, frame_data = item
             if frame_idx % 30 == 0:
+                logging.debug("Processing frame %d on device %s", frame_idx, "cpu" if num_gpus==0 else f"cuda:{gpu_idx}")
             try:
                 # Depth strategy: Run every 3 frames
             idx, frame = item
             if idx % 30 == 0:
+                logging.debug("Segmenting frame %d (GPU %d)", idx, gpu_idx)
             try:
                 processed, _ = infer_segmentation_frame(