Zhen Ye commited on
Commit
391cb94
·
1 Parent(s): 970cc85

feat: fix depth inversion and add video overlays

Browse files

- Inverted depth calculation (closer=smaller distance)
- Added bounding box overlays to depth visualization video
- Pipelined detection data to depth generator
- Updated frontend radar logic (implicitly fixed by backend correction)

Files changed (3) hide show
  1. app.py +1 -1
  2. inference.py +86 -32
  3. jobs/background.py +5 -1
app.py CHANGED
@@ -232,7 +232,7 @@ async def detect_endpoint(
232
  # Run inference
233
  try:
234
  detector_name = "drone_yolo" if mode == "drone_detection" else detector
235
- output_path = run_inference(
236
  input_path,
237
  output_path,
238
  query_list,
 
232
  # Run inference
233
  try:
234
  detector_name = "drone_yolo" if mode == "drone_detection" else detector
235
+ output_path, _ = run_inference(
236
  input_path,
237
  output_path,
238
  query_list,
inference.py CHANGED
@@ -240,8 +240,18 @@ def _attach_depth_metrics(
240
  if finite.size == 0:
241
  continue
242
 
243
- depth_raw = float(np.median(finite))
244
- depth_est = depth_raw * depth_scale
 
 
 
 
 
 
 
 
 
 
245
  det["depth_est_m"] = depth_est
246
  det["depth_valid"] = True
247
  valid_depths.append(depth_est)
@@ -395,7 +405,7 @@ def run_inference(
395
  job_id: Optional[str] = None,
396
  depth_estimator_name: Optional[str] = None,
397
  depth_scale: float = 1.0,
398
- ) -> str:
399
  """
400
  Run object detection inference on a video.
401
 
@@ -469,11 +479,12 @@ def run_inference(
469
  detectors = None
470
 
471
  processed_frames_map = {}
 
472
 
473
  # Process frames
474
  if detectors:
475
  # Multi-GPU Parallel Processing
476
- def process_frame_task(frame_idx: int, frame_data: np.ndarray) -> tuple[int, np.ndarray]:
477
  # Determine which GPU to use based on frame index (round-robin)
478
  gpu_idx = frame_idx % len(detectors)
479
  detector_instance = detectors[gpu_idx]
@@ -485,7 +496,7 @@ def run_inference(
485
  active_depth_name = depth_estimator_name if (frame_idx % 3 == 0) else None
486
  active_depth_instance = depth_instance if (frame_idx % 3 == 0) else None
487
 
488
- processed, _ = infer_frame(
489
  frame_data,
490
  queries,
491
  detector_name=None, # Use instance
@@ -494,7 +505,7 @@ def run_inference(
494
  detector_instance=detector_instance,
495
  depth_estimator_instance=active_depth_instance
496
  )
497
- return frame_idx, processed
498
 
499
  # Thread pool with more workers than GPUs to keep them fed
500
  max_workers = min(len(detectors) * 2, 8)
@@ -507,15 +518,18 @@ def run_inference(
507
  futures.append(executor.submit(process_frame_task, idx, frame))
508
 
509
  for future in futures:
510
- idx, result_frame = future.result() # Wait for completion (in order or not, but we verify order)
511
  processed_frames_map[idx] = result_frame
 
512
 
513
  # Reasemble in order
514
  processed_frames = [processed_frames_map[i] for i in range(len(processed_frames_map))]
 
515
 
516
  else:
517
  # Standard Single-Threaded Loop
518
  processed_frames = []
 
519
  for idx, frame in enumerate(frames):
520
  # Check for cancellation every frame
521
  _check_cancellation(job_id)
@@ -527,7 +541,7 @@ def run_inference(
527
  # Run depth estimation every 3 frames if configured
528
  active_depth = depth_estimator_name if (idx % 3 == 0) else None
529
 
530
- processed_frame, _ = infer_frame(
531
  frame,
532
  queries,
533
  detector_name=active_detector,
@@ -535,12 +549,13 @@ def run_inference(
535
  depth_scale=depth_scale
536
  )
537
  processed_frames.append(processed_frame)
 
538
 
539
  # Write output video
540
  write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
541
  logging.info("Processed video written to: %s", output_video_path)
542
 
543
- return output_video_path
544
 
545
 
546
  def run_segmentation(
@@ -630,6 +645,7 @@ def run_segmentation(
630
  def run_depth_inference(
631
  input_video_path: str,
632
  output_video_path: str,
 
633
  max_frames: Optional[int] = None,
634
  depth_estimator_name: str = "depth",
635
  first_frame_depth_path: Optional[str] = None,
@@ -661,8 +677,8 @@ def run_depth_inference(
661
  if max_frames is not None:
662
  frames = frames[:max_frames]
663
 
664
- # Process depth with stable normalization
665
- processed_frames = process_frames_depth(frames, depth_estimator_name, job_id)
666
 
667
  # Write output video
668
  write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
@@ -680,6 +696,7 @@ def run_depth_inference(
680
  def process_frames_depth(
681
  frames: List[np.ndarray],
682
  depth_estimator_name: str,
 
683
  job_id: Optional[str] = None,
684
  ) -> List[np.ndarray]:
685
  """
@@ -791,32 +808,69 @@ def process_frames_depth(
791
 
792
  if not np.isfinite(global_min) or not np.isfinite(global_max):
793
  logging.warning("Depth percentiles are non-finite - using min/max fallback")
794
- global_min = float(valid_depths.min())
795
- global_max = float(valid_depths.max())
796
 
797
- # Handle edge case where min == max
798
- if abs(global_max - global_min) < 1e-6:
799
- global_min = float(valid_depths.min())
800
- global_max = float(valid_depths.max())
801
  if abs(global_max - global_min) < 1e-6:
802
- global_max = global_min + 1.0
803
-
804
- logging.info(
805
- "Depth range: %.2f - %.2f meters (1st-99th percentile)",
806
- global_min,
807
- global_max,
808
- )
 
 
 
809
 
810
- # Second pass: Colorize with stable normalization
811
- processed = []
812
- for idx, depth_map in enumerate(depth_maps):
813
- depth_vis = colorize_depth_map(depth_map, global_min, global_max)
814
- processed.append(depth_vis)
 
815
 
816
- if idx % 10 == 0:
817
- logging.debug("Colorized frame %d/%d", idx + 1, len(depth_maps))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
 
819
- return processed
820
 
821
 
822
  def colorize_depth_map(
 
240
  if finite.size == 0:
241
  continue
242
 
243
+ if depth_raw <= 1e-6:
244
+ det["depth_est_m"] = None
245
+ det["depth_valid"] = False
246
+ continue
247
+
248
+ # Inverted depth: closer objects have higher raw values
249
+ # Distance = Scale / RawValue
250
+ try:
251
+ depth_est = depth_scale / depth_raw
252
+ except ZeroDivisionError:
253
+ continue
254
+
255
  det["depth_est_m"] = depth_est
256
  det["depth_valid"] = True
257
  valid_depths.append(depth_est)
 
405
  job_id: Optional[str] = None,
406
  depth_estimator_name: Optional[str] = None,
407
  depth_scale: float = 1.0,
408
+ ) -> tuple[str, List[List[Dict[str, Any]]]]:
409
  """
410
  Run object detection inference on a video.
411
 
 
479
  detectors = None
480
 
481
  processed_frames_map = {}
482
+ all_detections_map = {}
483
 
484
  # Process frames
485
  if detectors:
486
  # Multi-GPU Parallel Processing
487
+ def process_frame_task(frame_idx: int, frame_data: np.ndarray) -> tuple[int, np.ndarray, List[Dict[str, Any]]]:
488
  # Determine which GPU to use based on frame index (round-robin)
489
  gpu_idx = frame_idx % len(detectors)
490
  detector_instance = detectors[gpu_idx]
 
496
  active_depth_name = depth_estimator_name if (frame_idx % 3 == 0) else None
497
  active_depth_instance = depth_instance if (frame_idx % 3 == 0) else None
498
 
499
+ processed, frame_dets = infer_frame(
500
  frame_data,
501
  queries,
502
  detector_name=None, # Use instance
 
505
  detector_instance=detector_instance,
506
  depth_estimator_instance=active_depth_instance
507
  )
508
+ return frame_idx, processed, frame_dets
509
 
510
  # Thread pool with more workers than GPUs to keep them fed
511
  max_workers = min(len(detectors) * 2, 8)
 
518
  futures.append(executor.submit(process_frame_task, idx, frame))
519
 
520
  for future in futures:
521
+ idx, result_frame, result_dets = future.result() # Wait for completion (in order or not, but we verify order)
522
  processed_frames_map[idx] = result_frame
523
+ all_detections_map[idx] = result_dets
524
 
525
  # Reasemble in order
526
  processed_frames = [processed_frames_map[i] for i in range(len(processed_frames_map))]
527
+ all_detections = [all_detections_map[i] for i in range(len(all_detections_map))]
528
 
529
  else:
530
  # Standard Single-Threaded Loop
531
  processed_frames = []
532
+ all_detections = []
533
  for idx, frame in enumerate(frames):
534
  # Check for cancellation every frame
535
  _check_cancellation(job_id)
 
541
  # Run depth estimation every 3 frames if configured
542
  active_depth = depth_estimator_name if (idx % 3 == 0) else None
543
 
544
+ processed_frame, frame_dets = infer_frame(
545
  frame,
546
  queries,
547
  detector_name=active_detector,
 
549
  depth_scale=depth_scale
550
  )
551
  processed_frames.append(processed_frame)
552
+ all_detections.append(frame_dets)
553
 
554
  # Write output video
555
  write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
556
  logging.info("Processed video written to: %s", output_video_path)
557
 
558
+ return output_video_path, all_detections
559
 
560
 
561
  def run_segmentation(
 
645
  def run_depth_inference(
646
  input_video_path: str,
647
  output_video_path: str,
648
+ detections: Optional[List[List[Dict[str, Any]]]] = None,
649
  max_frames: Optional[int] = None,
650
  depth_estimator_name: str = "depth",
651
  first_frame_depth_path: Optional[str] = None,
 
677
  if max_frames is not None:
678
  frames = frames[:max_frames]
679
 
680
+ # Process depth with stable normalization and overlay
681
+ processed_frames = process_frames_depth(frames, depth_estimator_name, detections=detections, job_id=job_id)
682
 
683
  # Write output video
684
  write_video(processed_frames, output_video_path, fps=fps, width=width, height=height)
 
696
  def process_frames_depth(
697
  frames: List[np.ndarray],
698
  depth_estimator_name: str,
699
+ detections: Optional[List[List[Dict[str, Any]]]] = None,
700
  job_id: Optional[str] = None,
701
  ) -> List[np.ndarray]:
702
  """
 
808
 
809
  if not np.isfinite(global_min) or not np.isfinite(global_max):
810
  logging.warning("Depth percentiles are non-finite - using min/max fallback")
811
+ global_min = float(valid_depths.min())
812
+ global_max = float(valid_depths.max())
813
 
814
+ # Handle edge case where min == max
 
 
 
815
  if abs(global_max - global_min) < 1e-6:
816
+ global_min = float(valid_depths.min())
817
+ global_max = float(valid_depths.max())
818
+ if abs(global_max - global_min) < 1e-6:
819
+ global_max = global_min + 1.0
820
+
821
+ logging.info(
822
+ "Depth range: %.2f - %.2f meters (1st-99th percentile)",
823
+ global_min,
824
+ global_max,
825
+ )
826
 
827
+ # Second pass: Apply colormap and overlay detections
828
+ visualization_frames = []
829
+
830
+ # draw_boxes is defined in this module, so we can use it directly.
831
+ # Ensure cv2 is imported
832
+ import cv2
833
 
834
+ for i, depth_map in enumerate(depth_maps):
835
+ _check_cancellation(job_id)
836
+
837
+ # Norm: (val - min) / (max - min) -> 0..1
838
+ # Clip to ensure range
839
+ norm_map = np.clip(depth_map, global_min, global_max)
840
+ norm_map = (norm_map - global_min) / (global_max - global_min + 1e-6)
841
+
842
+ # Invert intensity? Usually Near(High val) -> Bright(1.0).
843
+ # Our val is high for near. So direct map is fine.
844
+
845
+ # Colorize
846
+ norm_map_u8 = (norm_map * 255).astype(np.uint8)
847
+ heatmap = cv2.applyColorMap(norm_map_u8, cv2.COLORMAP_INFERNO)
848
+
849
+ # Overlay detections if available
850
+ if detections and i < len(detections):
851
+ frame_dets = detections[i]
852
+ # Convert list of dicts to format for draw_boxes
853
+ if frame_dets:
854
+ boxes = []
855
+ labels = []
856
+ display_labels = []
857
+
858
+ for d in frame_dets:
859
+ boxes.append(d.get("bbox"))
860
+ # Create label "Class Dist"
861
+ lbl = d.get("label", "obj")
862
+ # If we have depth info that was calculated in inference:
863
+ if d.get("depth_est_m"):
864
+ lbl = f"{lbl} {int(d['depth_est_m'])}m"
865
+
866
+ labels.append(lbl) # used for color
867
+ display_labels.append(lbl)
868
+
869
+ heatmap = draw_boxes(heatmap, boxes, labels, text_queries=None, display_labels=display_labels)
870
+
871
+ visualization_frames.append(heatmap)
872
 
873
+ return visualization_frames
874
 
875
 
876
  def colorize_depth_map(
jobs/background.py CHANGED
@@ -33,7 +33,8 @@ async def process_video_async(job_id: str) -> None:
33
  job_id,
34
  )
35
  else:
36
- detection_path = await asyncio.to_thread(
 
37
  run_inference,
38
  job.input_video_path,
39
  job.output_video_path,
@@ -44,6 +45,8 @@ async def process_video_async(job_id: str) -> None:
44
  job.depth_estimator_name,
45
  job.depth_scale,
46
  )
 
 
47
 
48
  # Try to run depth estimation
49
  try:
@@ -51,6 +54,7 @@ async def process_video_async(job_id: str) -> None:
51
  run_depth_inference,
52
  job.input_video_path,
53
  str(get_depth_output_path(job_id)),
 
54
  None, # max_frames
55
  job.depth_estimator_name,
56
  str(get_first_frame_depth_path(job_id)),
 
33
  job_id,
34
  )
35
  else:
36
+ detections_list = None
37
+ result_pkg = await asyncio.to_thread(
38
  run_inference,
39
  job.input_video_path,
40
  job.output_video_path,
 
45
  job.depth_estimator_name,
46
  job.depth_scale,
47
  )
48
+ # run_inference now returns (path, detections)
49
+ detection_path, detections_list = result_pkg
50
 
51
  # Try to run depth estimation
52
  try:
 
54
  run_depth_inference,
55
  job.input_video_path,
56
  str(get_depth_output_path(job_id)),
57
+ detections_list, # Pass detections for overlay
58
  None, # max_frames
59
  job.depth_estimator_name,
60
  str(get_first_frame_depth_path(job_id)),