Zhen Ye commited on
Commit
dc409c8
·
1 Parent(s): 3bcc90a

fix: add wget to Dockerfile, cleanup js errors, and remove GPU env override

Browse files
Dockerfile CHANGED
@@ -8,6 +8,7 @@ WORKDIR /app
8
  COPY requirements.txt ./
9
  RUN apt-get update && apt-get install -y --no-install-recommends \
10
  git \
 
11
  libgl1 \
12
  libglib2.0-0 \
13
  ffmpeg \
 
8
  COPY requirements.txt ./
9
  RUN apt-get update && apt-get install -y --no-install-recommends \
10
  git \
11
+ wget \
12
  libgl1 \
13
  libglib2.0-0 \
14
  ffmpeg \
LaserPerception/LaserPerception.css CHANGED
@@ -700,7 +700,7 @@ input[type="number"]:focus {
700
  .frame-grid {
701
  display: grid;
702
  grid-template-columns: 1.6fr .9fr;
703
- grid-template-rows: auto auto 240px;
704
  gap: 12px;
705
  min-height: 0;
706
  }
 
700
  .frame-grid {
701
  display: grid;
702
  grid-template-columns: 1.6fr .9fr;
703
+ grid-template-rows: auto auto 1fr;
704
  gap: 12px;
705
  min-height: 0;
706
  }
LaserPerception/LaserPerception.js CHANGED
@@ -142,7 +142,8 @@
142
  const frameEmpty = $("#frameEmpty");
143
  const frameNote = $("#frameNote");
144
 
145
- const objList = $("#objList");
 
146
  const objCount = $("#objCount");
147
  const featureTable = $("#featureTable");
148
  const selId = $("#selId");
@@ -818,6 +819,33 @@
818
  const kind = sel.kind;
819
  const videoFile = state.videoFile;
820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
821
  if (!videoFile) {
822
  throw new Error("No video loaded");
823
  }
@@ -925,6 +953,46 @@
925
  log("First frame ready (no detections payload)", "t");
926
  }
927
  displayAsyncFirstFrame();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928
  }
929
 
930
  log(`Backend job ID: ${data.job_id} (polling every 3s)`, "t");
@@ -1943,7 +2011,7 @@
1943
  if (state.ui.cursorMode === "on") {
1944
  moveCursorToRect(btnReason.getBoundingClientRect());
1945
  setTimeout(() => moveCursorToRect(frameCanvas.getBoundingClientRect()), 260);
1946
- setTimeout(() => moveCursorToRect(objList.getBoundingClientRect()), 560);
1947
  // setTimeout(() => moveCursorToRect(summaryTable.getBoundingClientRect()), 880);
1948
  }
1949
 
@@ -2113,7 +2181,7 @@
2113
  async function recomputeHEL() {
2114
  if (!state.detections.length) return;
2115
  const knobs = getKnobs();
2116
- summaryStamp.textContent = "Computing...";
2117
 
2118
  try {
2119
  const result = await externalHEL(state.detections, knobs);
@@ -2151,7 +2219,7 @@
2151
  console.error("HEL recompute failed", err);
2152
  }
2153
 
2154
- summaryStamp.textContent = new Date().toLocaleTimeString();
2155
  // renderSummary();
2156
  refreshTradeTargets();
2157
  }
@@ -2233,7 +2301,7 @@
2233
 
2234
  card.innerHTML = `
2235
  <div class="track-card-header">
2236
- <span>${id} · ${det.label} ${gptBadge}</span>
2237
  <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
2238
  </div>
2239
  <div class="track-card-meta">
@@ -3002,107 +3070,190 @@
3002
  const ctx = frameRadar.getContext("2d");
3003
  const rect = frameRadar.getBoundingClientRect();
3004
  const dpr = devicePixelRatio || 1;
 
 
3005
  const targetW = Math.max(1, Math.floor(rect.width * dpr));
3006
  const targetH = Math.max(1, Math.floor(rect.height * dpr));
3007
-
3008
  if (frameRadar.width !== targetW || frameRadar.height !== targetH) {
3009
  frameRadar.width = targetW;
3010
  frameRadar.height = targetH;
3011
  }
3012
 
3013
  const w = frameRadar.width, h = frameRadar.height;
 
 
 
3014
  ctx.clearRect(0, 0, w, h);
3015
 
3016
- // Background
3017
- ctx.fillStyle = "rgba(0,0,0,.35)";
3018
  ctx.fillRect(0, 0, w, h);
3019
 
3020
- const cx = w * 0.5, cy = h * 0.5;
3021
- const R = Math.min(w, h) * 0.42;
3022
-
3023
- // Rings
3024
- ctx.strokeStyle = "rgba(255,255,255,.10)";
3025
  ctx.lineWidth = 1;
3026
  for (let i = 1; i <= 4; i++) {
3027
  ctx.beginPath();
3028
- ctx.arc(cx, cy, R * i / 4, 0, Math.PI * 2);
3029
  ctx.stroke();
3030
  }
3031
 
3032
- // Crosshair
3033
- ctx.beginPath(); ctx.moveTo(cx - R, cy); ctx.lineTo(cx + R, cy); ctx.stroke();
3034
- ctx.beginPath(); ctx.moveTo(cx, cy - R); ctx.lineTo(cx, cy + R); ctx.stroke();
 
 
 
 
3035
 
3036
- // Sweep Animation
3037
- const t = now() / 1000;
3038
- const ang = (t * 0.65) % (Math.PI * 2);
3039
- ctx.strokeStyle = "rgba(34,211,238,.22)";
3040
- ctx.lineWidth = 2;
 
 
 
 
 
 
 
 
 
 
 
 
 
3041
  ctx.beginPath();
3042
  ctx.moveTo(cx, cy);
3043
  ctx.lineTo(cx + Math.cos(ang) * R, cy + Math.sin(ang) * R);
3044
  ctx.stroke();
3045
 
3046
- // Ownship
3047
- ctx.fillStyle = "rgba(34,211,238,.85)";
3048
  ctx.beginPath();
3049
- ctx.arc(cx, cy, 5, 0, Math.PI * 2);
3050
  ctx.fill();
 
 
 
 
 
 
3051
 
3052
- // Draw Detections from State
3053
  if (state.detections) {
3054
- state.detections.forEach((det, i) => {
3055
- // Skip if no box
3056
- if (!det.box) return;
3057
-
3058
- // Calculate Range
3059
- let range_m = 1500; // fallback
3060
- if (det.depth_est_m) range_m = det.depth_est_m;
3061
- else if (det.gpt_distance_m) range_m = det.gpt_distance_m;
3062
- else {
3063
- // Area fallback
3064
- // det.box is [x1, y1, x2, y2]
3065
- const [x1, y1, x2, y2] = det.box;
3066
- const bw = x2 - x1;
3067
- const bh = y2 - y1;
3068
- const area = (bw * bh) / (state.frame.w * state.frame.h);
3069
- // Rough heuristic matching default
3070
- range_m = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
 
3071
  }
3072
 
3073
- // Calculate Bearing
3074
- const [x1, y1, x2, y2] = det.box;
3075
- const bx = (x1 + x2) / 2;
3076
- const by = (y1 + y2) / 2;
3077
-
3078
- const tx = bx / state.frame.w - 0.5;
3079
- const ty = by / state.frame.h - 0.5;
3080
- const bearing = Math.atan2(ty, tx);
3081
-
3082
- // Polar to Cartesian
3083
- const maxRange = 6000; // align with radar scale
3084
- const normRange = clamp(range_m / maxRange, 0.05, 1.0);
3085
- const rad = normRange * (R / 1.0); // full R = maxRange
3086
-
3087
- // Draw Blip
3088
- const px = cx + Math.cos(bearing) * rad;
3089
- const py = cy + Math.sin(bearing) * rad;
3090
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3091
 
3092
- ctx.fillStyle = "rgba(124,58,237,.9)"; // default purple
3093
- if (det.label === "person") ctx.fillStyle = "rgba(239,68,68,.9)"; // red
 
 
 
 
3094
 
 
3095
  ctx.beginPath();
3096
- ctx.arc(px, py, 4, 0, Math.PI * 2);
3097
  ctx.fill();
3098
 
3099
- // Label
3100
- ctx.fillStyle = "rgba(255,255,255,.75)";
3101
- ctx.font = "10px " + getComputedStyle(document.body).fontFamily;
3102
- ctx.fillText(`${det.label} ${Math.round(range_m)}m`, px + 6, py + 3);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3103
  });
3104
  }
3105
-
3106
  requestAnimationFrame(renderFrameRadar);
3107
  }
3108
  // Start loop immediately
 
142
  const frameEmpty = $("#frameEmpty");
143
  const frameNote = $("#frameNote");
144
 
145
+ // const objList = $("#objList"); // Removed
146
+ // const objList = $("#objList"); // Removed
147
  const objCount = $("#objCount");
148
  const featureTable = $("#featureTable");
149
  const selId = $("#selId");
 
819
  const kind = sel.kind;
820
  const videoFile = state.videoFile;
821
 
822
+ // Reset State & UI for new run
823
+ state.detections = [];
824
+ state.selectedId = null;
825
+ state.tracker.tracks = []; // Clear tracking state too
826
+
827
+ // Clear cached backend results so they don't reappear
828
+ state.hf.firstFrameDetections = null;
829
+
830
+ // Explicitly clear UI using standard renderers
831
+ renderFrameTrackList();
832
+ renderFrameOverlay();
833
+ // Force a clear of the radar canvas (renderFrameRadar loop will pick up empty state next frame)
834
+ if (frameRadar) {
835
+ const ctx = frameRadar.getContext("2d");
836
+ ctx.clearRect(0, 0, frameRadar.width, frameRadar.height);
837
+ }
838
+
839
+ // Clear counts
840
+ if (trackCount) trackCount.textContent = "0";
841
+ if (objCount) objCount.textContent = "0";
842
+
843
+ // Show loading state in list manually if needed, or let renderFrameTrackList handle it (it shows "No objects tracked")
844
+ // But we want "Computing..."
845
+ if (frameTrackList) frameTrackList.innerHTML = '<div style="font-style:italic; color:var(--text-dim); text-align:center; margin-top:20px;">Computing...</div>';
846
+
847
+ renderFeatures(null); // Clear feature panel
848
+
849
  if (!videoFile) {
850
  throw new Error("No video loaded");
851
  }
 
953
  log("First frame ready (no detections payload)", "t");
954
  }
955
  displayAsyncFirstFrame();
956
+
957
+ // Populate state.detections with backend results so Radar and Cards work
958
+ if (state.hf.firstFrameDetections) {
959
+ state.detections = state.hf.firstFrameDetections.map((d, i) => {
960
+ const id = `T${String(i + 1).padStart(2, '0')}`;
961
+ const [x1, y1, x2, y2] = d.bbox || [0, 0, 0, 0];
962
+ const w = x2 - x1;
963
+ const h = y2 - y1;
964
+ const ap = defaultAimpoint(d.label); // Ensure defaultAimpoint is accessible
965
+
966
+ return {
967
+ id,
968
+ label: d.label,
969
+ score: d.score,
970
+ bbox: { x: x1, y: y1, w: w, h: h },
971
+ aim: { ...ap },
972
+ features: null,
973
+ baseRange_m: d.gpt_distance_m || d.depth_est_m || null, // Priority to GPT/Depth
974
+ baseAreaFrac: null,
975
+ baseDwell_s: null,
976
+ reqP_kW: null,
977
+ maxP_kW: null,
978
+ pkill: null,
979
+ // Pass through backend props
980
+ gpt_distance_m: d.gpt_distance_m,
981
+ gpt_direction: d.gpt_direction,
982
+ gpt_description: d.gpt_description,
983
+ depth_est_m: d.depth_est_m,
984
+ depth_rel: d.depth_rel,
985
+ depth_valid: !!(d.depth_est_m || d.gpt_distance_m)
986
+ };
987
+ });
988
+ // Update UI components
989
+ log(`Populating UI with ${state.detections.length} tracked objects`, "t");
990
+ renderFrameTrackList();
991
+ renderFrameRadar();
992
+ renderFeatures(null);
993
+ renderTrade();
994
+ renderFrameOverlay();
995
+ }
996
  }
997
 
998
  log(`Backend job ID: ${data.job_id} (polling every 3s)`, "t");
 
2011
  if (state.ui.cursorMode === "on") {
2012
  moveCursorToRect(btnReason.getBoundingClientRect());
2013
  setTimeout(() => moveCursorToRect(frameCanvas.getBoundingClientRect()), 260);
2014
+ setTimeout(() => moveCursorToRect(frameTrackList.getBoundingClientRect()), 560);
2015
  // setTimeout(() => moveCursorToRect(summaryTable.getBoundingClientRect()), 880);
2016
  }
2017
 
 
2181
  async function recomputeHEL() {
2182
  if (!state.detections.length) return;
2183
  const knobs = getKnobs();
2184
+ // summaryStamp.textContent = "Computing...";
2185
 
2186
  try {
2187
  const result = await externalHEL(state.detections, knobs);
 
2219
  console.error("HEL recompute failed", err);
2220
  }
2221
 
2222
+ // summaryStamp.textContent = new Date().toLocaleTimeString();
2223
  // renderSummary();
2224
  refreshTradeTargets();
2225
  }
 
2301
 
2302
  card.innerHTML = `
2303
  <div class="track-card-header">
2304
+ <span>${id} · ${det.label}</span>
2305
  <span class="badgemini">${(det.score * 100).toFixed(0)}%</span>
2306
  </div>
2307
  <div class="track-card-meta">
 
3070
  const ctx = frameRadar.getContext("2d");
3071
  const rect = frameRadar.getBoundingClientRect();
3072
  const dpr = devicePixelRatio || 1;
3073
+
3074
+ // Resize if needed
3075
  const targetW = Math.max(1, Math.floor(rect.width * dpr));
3076
  const targetH = Math.max(1, Math.floor(rect.height * dpr));
 
3077
  if (frameRadar.width !== targetW || frameRadar.height !== targetH) {
3078
  frameRadar.width = targetW;
3079
  frameRadar.height = targetH;
3080
  }
3081
 
3082
  const w = frameRadar.width, h = frameRadar.height;
3083
+ const cx = w * 0.5, cy = h * 0.5;
3084
+ const R = Math.min(w, h) * 0.45; // Max radius
3085
+
3086
  ctx.clearRect(0, 0, w, h);
3087
 
3088
+ // --- 1. Background (Tactical Grid) ---
3089
+ ctx.fillStyle = "#0a0f22"; // Matches --panel2
3090
  ctx.fillRect(0, 0, w, h);
3091
 
3092
+ // Grid Rings (Concentric)
3093
+ ctx.strokeStyle = "rgba(34, 211, 238, 0.1)"; // Cyan faint
 
 
 
3094
  ctx.lineWidth = 1;
3095
  for (let i = 1; i <= 4; i++) {
3096
  ctx.beginPath();
3097
+ ctx.arc(cx, cy, R * (i / 4), 0, Math.PI * 2);
3098
  ctx.stroke();
3099
  }
3100
 
3101
+ // Grid Spokes (Cross + Diagonals)
3102
+ ctx.beginPath();
3103
+ // Cardinals
3104
+ ctx.moveTo(cx - R, cy); ctx.lineTo(cx + R, cy);
3105
+ ctx.moveTo(cx, cy - R); ctx.lineTo(cx, cy + R);
3106
+ // Diagonals (optional, maybe too busy? let's stick to cleaning cardinals)
3107
+ ctx.stroke();
3108
 
3109
+ // --- 2. Sweep Animation ---
3110
+ const t = now() / 1500; // Slower, more deliberate sweep
3111
+ const ang = (t * (Math.PI * 2)) % (Math.PI * 2);
3112
+
3113
+ const grad = ctx.createConicGradient(ang + Math.PI / 2, cx, cy); // Offset to start at 0
3114
+ grad.addColorStop(0, "transparent");
3115
+ grad.addColorStop(0.1, "transparent");
3116
+ grad.addColorStop(0.8, "rgba(34, 211, 238, 0.0)");
3117
+ grad.addColorStop(1, "rgba(34, 211, 238, 0.15)"); // Trailing edge
3118
+
3119
+ ctx.fillStyle = grad;
3120
+ ctx.beginPath();
3121
+ ctx.arc(cx, cy, R, 0, Math.PI * 2);
3122
+ ctx.fill();
3123
+
3124
+ // Scan Line
3125
+ ctx.strokeStyle = "rgba(34, 211, 238, 0.6)";
3126
+ ctx.lineWidth = 1.5;
3127
  ctx.beginPath();
3128
  ctx.moveTo(cx, cy);
3129
  ctx.lineTo(cx + Math.cos(ang) * R, cy + Math.sin(ang) * R);
3130
  ctx.stroke();
3131
 
3132
+ // --- 3. Ownship (Center) ---
3133
+ ctx.fillStyle = "#22d3ee"; // Cyan
3134
  ctx.beginPath();
3135
+ ctx.arc(cx, cy, 3, 0, Math.PI * 2);
3136
  ctx.fill();
3137
+ // Ring around ownship
3138
+ ctx.strokeStyle = "rgba(34, 211, 238, 0.5)";
3139
+ ctx.lineWidth = 1;
3140
+ ctx.beginPath();
3141
+ ctx.arc(cx, cy, 6, 0, Math.PI * 2);
3142
+ ctx.stroke();
3143
 
3144
+ // --- 4. Render Detections ---
3145
  if (state.detections) {
3146
+ state.detections.forEach(det => {
3147
+ // Determine Range (pixels)
3148
+ // Map logical range (meters) to graphical range (0..R)
3149
+ let rangeVal = 3000; // default max scale in meters
3150
+ let dist = 1000; // default unknown
3151
+
3152
+ if (det.gpt_distance_m) dist = det.gpt_distance_m;
3153
+ else if (det.depth_est_m) dist = det.depth_est_m;
3154
+ else if (det.box) {
3155
+ // Fallback area heuristic
3156
+ // In hfAsync, we stored bbox: {x,y,w,h} in PIXELS.
3157
+
3158
+ // We need frame dimensions.
3159
+ const fw = state.frame.w || 1280;
3160
+ const fh = state.frame.h || 720;
3161
+ const area = (det.bbox.w * det.bbox.h) / (fw * fh);
3162
+ // Heuristic: Area 0.01 => ~2000m, Area 0.5 => ~280m
3163
+ dist = clamp(200 / Math.sqrt(Math.max(1e-6, area)), 50, 6000);
3164
  }
3165
 
3166
+ // Log scale or Linear? Linear is easier for users to map.
3167
+ // Let's use linear: 0m -> 0px, 1500m -> R
3168
+ const maxRangeM = 1500;
3169
+ const rPx = (clamp(dist, 0, maxRangeM) / maxRangeM) * R;
3170
+
3171
+ // Determine Bearing
3172
+ // box center relative to frame center
3173
+ const bx = det.bbox.x + det.bbox.w * 0.5;
3174
+ const fw = state.frame.w || 1280;
3175
+ const tx = (bx / fw) - 0.5; // -0.5 (left) to 0.5 (right)
3176
+ // Map x-axis (-0.5 to 0.5) to angle.
3177
+ // FOV assumption: ~60 degrees?
3178
+ const fovRad = (60 * Math.PI) / 180;
3179
+ // Actually canvas 0 is Right (0 rad).
3180
+ // We want Up (-PI/2) to be center.
3181
+ // So center (tx=0) should be -PI/2.
3182
+ // Left (tx=-0.5) => -PI/2 - fov/2.
3183
+ // Right (tx=0.5) => -PI/2 + fov/2.
3184
+ const angle = (-Math.PI / 2) + (tx * fovRad);
3185
+
3186
+ // --- Draw Blip ---
3187
+ const px = cx + Math.cos(angle) * rPx;
3188
+ const py = cy + Math.sin(angle) * rPx;
3189
+
3190
+ const isSelected = (state.selectedId === det.id);
3191
+
3192
+ // Glow for selected
3193
+ if (isSelected) {
3194
+ ctx.shadowBlur = 10;
3195
+ ctx.shadowColor = "#f59e0b"; // Amber glow
3196
+ } else {
3197
+ ctx.shadowBlur = 0;
3198
+ }
3199
 
3200
+ // Blip Color
3201
+ // If it has GPT data, maybe special color? Or just distinct per class?
3202
+ let col = "#7c3aed"; // Default violet
3203
+ if (det.label === 'person') col = "#ef4444"; // Red
3204
+ if (det.label === 'airplane') col = "#f59e0b"; // Amber
3205
+ if (isSelected) col = "#ffffff"; // White for selected
3206
 
3207
+ ctx.fillStyle = col;
3208
  ctx.beginPath();
3209
+ ctx.arc(px, py, isSelected ? 5 : 3.5, 0, Math.PI * 2);
3210
  ctx.fill();
3211
 
3212
+ // Blip Label (if selected or hovered - just show ID)
3213
+ // Just Show ID for all? Might clutter. Show for selected.
3214
+ if (isSelected) {
3215
+ ctx.fillStyle = "#fff";
3216
+ ctx.font = "bold 11px monospace";
3217
+ ctx.fillText(det.id, px + 8, py + 3);
3218
+
3219
+ // Connected Line to center
3220
+ ctx.strokeStyle = "rgba(255, 255, 255, 0.4)";
3221
+ ctx.lineWidth = 1;
3222
+ ctx.setLineDash([2, 2]); // Optional: dashed line for "targeting" feel
3223
+ ctx.beginPath();
3224
+ ctx.moveTo(cx, cy);
3225
+ ctx.lineTo(px, py);
3226
+ ctx.stroke();
3227
+ ctx.setLineDash([]); // Reset
3228
+
3229
+ // Distance Label on Line
3230
+ const mx = (cx + px) * 0.5;
3231
+ const my = (cy + py) * 0.5;
3232
+ const distStr = `${Math.round(dist)}m`;
3233
+
3234
+ ctx.font = "10px monospace";
3235
+ const tm = ctx.measureText(distStr);
3236
+ const tw = tm.width;
3237
+ const th = 10;
3238
+
3239
+ // Label Background
3240
+ ctx.fillStyle = "rgba(10, 15, 34, 0.85)";
3241
+ ctx.fillRect(mx - tw / 2 - 3, my - th / 2 - 2, tw + 6, th + 4);
3242
+
3243
+ // Label Text
3244
+ ctx.fillStyle = "#22d3ee"; // Cyan
3245
+ ctx.textAlign = "center";
3246
+ ctx.textBaseline = "middle";
3247
+ ctx.fillText(distStr, mx, my);
3248
+
3249
+ // Reset text alignment
3250
+ ctx.textAlign = "start";
3251
+ ctx.textBaseline = "alphabetic";
3252
+ }
3253
+
3254
+ ctx.shadowBlur = 0; // reset
3255
  });
3256
  }
 
3257
  requestAnimationFrame(renderFrameRadar);
3258
  }
3259
  // Start loop immediately
inference.py CHANGED
@@ -435,6 +435,7 @@ def process_first_frame(
435
  with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
436
  cv2.imwrite(tmp_img.name, frame)
437
  gpt_results = estimate_distance_gpt(tmp_img.name, detections)
 
438
  os.remove(tmp_img.name) # Clean up immediatey
439
 
440
  # Merge GPT results into detections
@@ -503,9 +504,9 @@ def run_inference(
503
 
504
  # Clear CUDA_VISIBLE_DEVICES to ensure we see all GPUs if not already handled
505
  # This must be done BEFORE any torch.cuda calls in this scope if the env was modified externally
506
- if "CUDA_VISIBLE_DEVICES" in os.environ:
507
- logging.info("[DEBUG] Deleting CUDA_VISIBLE_DEVICES from env")
508
- del os.environ["CUDA_VISIBLE_DEVICES"]
509
 
510
  num_gpus = torch.cuda.device_count()
511
  logging.info(f"[DEBUG] num_gpus after clear: {num_gpus}")
@@ -571,7 +572,7 @@ def run_inference(
571
  frame_idx, frame_data = item
572
 
573
  if frame_idx % 30 == 0:
574
- logging.info("Processing frame %d on device %s", frame_idx, "cpu" if num_gpus==0 else f"cuda:{gpu_idx}")
575
 
576
  try:
577
  # Depth strategy: Run every 3 frames
@@ -766,7 +767,7 @@ def run_segmentation(
766
  idx, frame = item
767
 
768
  if idx % 30 == 0:
769
- logging.info("Segmenting frame %d (GPU %d)", idx, gpu_idx)
770
 
771
  try:
772
  processed, _ = infer_segmentation_frame(
 
435
  with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_img:
436
  cv2.imwrite(tmp_img.name, frame)
437
  gpt_results = estimate_distance_gpt(tmp_img.name, detections)
438
+ logging.info(f"GPT Output for First Frame:\n{gpt_results}") # Expose to HF logs
439
  os.remove(tmp_img.name) # Clean up immediatey
440
 
441
  # Merge GPT results into detections
 
504
 
505
  # Clear CUDA_VISIBLE_DEVICES to ensure we see all GPUs if not already handled
506
  # This must be done BEFORE any torch.cuda calls in this scope if the env was modified externally
507
+ # if "CUDA_VISIBLE_DEVICES" in os.environ:
508
+ # logging.info("[DEBUG] Deleting CUDA_VISIBLE_DEVICES from env")
509
+ # del os.environ["CUDA_VISIBLE_DEVICES"]
510
 
511
  num_gpus = torch.cuda.device_count()
512
  logging.info(f"[DEBUG] num_gpus after clear: {num_gpus}")
 
572
  frame_idx, frame_data = item
573
 
574
  if frame_idx % 30 == 0:
575
+ logging.debug("Processing frame %d on device %s", frame_idx, "cpu" if num_gpus==0 else f"cuda:{gpu_idx}")
576
 
577
  try:
578
  # Depth strategy: Run every 3 frames
 
767
  idx, frame = item
768
 
769
  if idx % 30 == 0:
770
+ logging.debug("Segmenting frame %d (GPU %d)", idx, gpu_idx)
771
 
772
  try:
773
  processed, _ = infer_segmentation_frame(