Spaces:

BiasLab2025
/

perception

Paused

Zhen Ye commited on 23 days ago

Commit

356dce8

1 Parent(s): 2392266

feat: Implement AsyncVideoReader for parallel video decoding

Addresses CPU bottleneck during inference by moving video decoding to a
background thread with a prefetch buffer. Integrated into inference pipeline.

Files changed (2) hide show

inference.py +8 -8
utils/video.py +88 -0

inference.py CHANGED Viewed

@@ -23,7 +23,7 @@ from models.model_loader import load_detector, load_detector_on_device
 from models.segmenters.model_loader import load_segmenter, load_segmenter_on_device
 from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
 from models.depth_estimators.base import DepthEstimator
-from utils.video import extract_frames, write_video, VideoReader, VideoWriter
 from utils.gpt_distance import estimate_distance_gpt
 import tempfile
@@ -591,7 +591,7 @@ def run_inference(
     # 1. Setup Video Reader
     try:
-        reader = VideoReader(input_video_path)
     except ValueError:
         logging.exception("Failed to open video at %s", input_video_path)
         raise
@@ -688,7 +688,7 @@ def run_inference(
         try:
              # Quick reader scan
-             reader_scan = VideoReader(input_video_path)
              scan_frames = []
              for i, frame in enumerate(reader_scan):
                  if i in target_indices:
@@ -965,7 +965,7 @@ def run_segmentation(
 ) -> str:
     # 1. Setup Reader
     try:
-        reader = VideoReader(input_video_path)
     except ValueError:
         logging.exception("Failed to open video at %s", input_video_path)
         raise
@@ -1128,7 +1128,7 @@ def run_segmentation(
     # Feeder
     try:
-        reader = VideoReader(input_video_path)
         for i, frame in enumerate(reader):
             _check_cancellation(job_id)
             if max_frames is not None and i >= max_frames:
@@ -1161,7 +1161,7 @@ def run_depth_inference(
 ) -> str:
     # 1. Setup Reader
     try:
-        reader = VideoReader(input_video_path)
     except ValueError:
         logging.exception("Failed to open video at %s", input_video_path)
         raise
@@ -1264,7 +1264,7 @@ def run_depth_inference(
         # We will iterate and pick.
         cnt = 0
-        reader_scan = VideoReader(input_video_path)
         for i, frame in enumerate(reader_scan):
             if i in target_indices:
                scan_frames_data.append(frame)
@@ -1440,7 +1440,7 @@ def run_depth_inference(
     # Feeder
     try:
-        reader = VideoReader(input_video_path)
         for i, frame in enumerate(reader):
             _check_cancellation(job_id)
             if max_frames is not None and i >= max_frames:

 from models.segmenters.model_loader import load_segmenter, load_segmenter_on_device
 from models.depth_estimators.model_loader import load_depth_estimator, load_depth_estimator_on_device
 from models.depth_estimators.base import DepthEstimator
+from utils.video import extract_frames, write_video, VideoReader, VideoWriter, AsyncVideoReader
 from utils.gpt_distance import estimate_distance_gpt
 import tempfile
     # 1. Setup Video Reader
     try:
+        reader = AsyncVideoReader(input_video_path)
     except ValueError:
         logging.exception("Failed to open video at %s", input_video_path)
         raise
         try:
              # Quick reader scan
+             reader_scan = AsyncVideoReader(input_video_path)
              scan_frames = []
              for i, frame in enumerate(reader_scan):
                  if i in target_indices:
 ) -> str:
     # 1. Setup Reader
     try:
+        reader = AsyncVideoReader(input_video_path)
     except ValueError:
         logging.exception("Failed to open video at %s", input_video_path)
         raise
     # Feeder
     try:
+        # reader = VideoReader(input_video_path) # Reusing existing reader
         for i, frame in enumerate(reader):
             _check_cancellation(job_id)
             if max_frames is not None and i >= max_frames:
 ) -> str:
     # 1. Setup Reader
     try:
+        reader = AsyncVideoReader(input_video_path)
     except ValueError:
         logging.exception("Failed to open video at %s", input_video_path)
         raise
         # We will iterate and pick.
         cnt = 0
+        reader_scan = AsyncVideoReader(input_video_path)
         for i, frame in enumerate(reader_scan):
             if i in target_indices:
                scan_frames_data.append(frame)
     # Feeder
     try:
+        # reader = VideoReader(input_video_path) # Reusing existing reader
         for i, frame in enumerate(reader):
             _check_cancellation(job_id)
             if max_frames is not None and i >= max_frames:

utils/video.py CHANGED Viewed

@@ -114,6 +114,94 @@ class VideoReader:
         self.close()
 class VideoWriter:
     def __init__(self, output_path: str, fps: float, width: int, height: int):
         self.output_path = output_path

         self.close()
+class AsyncVideoReader:
+    """
+    Async video reader that decodes frames in a background thread.
+    This prevents GPU starvation on multi-GPU systems by prefetching frames
+    while the main thread is busy dispatching work to GPUs.
+    """
+    def __init__(self, video_path: str, prefetch_size: int = 32):
+        """
+        Initialize async video reader.
+        Args:
+            video_path: Path to video file
+            prefetch_size: Number of frames to prefetch (default 32)
+        """
+        from queue import Queue
+        from threading import Thread
+        self.video_path = video_path
+        self.prefetch_size = prefetch_size
+        # Open video to get metadata
+        self._cap = cv2.VideoCapture(video_path)
+        if not self._cap.isOpened():
+            raise ValueError(f"Unable to open video: {video_path}")
+        self.fps = self._cap.get(cv2.CAP_PROP_FPS) or 30.0
+        self.width = int(self._cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        self.height = int(self._cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        self.total_frames = int(self._cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        # Prefetch queue
+        self._queue: Queue = Queue(maxsize=prefetch_size)
+        self._error: Exception = None
+        self._finished = False
+        # Start decoder thread
+        self._thread = Thread(target=self._decode_loop, daemon=True)
+        self._thread.start()
+    def _decode_loop(self):
+        """Background thread that continuously decodes frames."""
+        try:
+            while True:
+                success, frame = self._cap.read()
+                if not success:
+                    break
+                self._queue.put(frame)  # Blocks when queue is full (backpressure)
+        except Exception as e:
+            self._error = e
+            logging.error(f"AsyncVideoReader decode error: {e}")
+        finally:
+            self._cap.release()
+            self._queue.put(None)  # Sentinel to signal end
+            self._finished = True
+    def __iter__(self):
+        return self
+    def __next__(self) -> np.ndarray:
+        if self._error:
+            raise self._error
+        frame = self._queue.get()
+        if frame is None:
+            raise StopIteration
+        return frame
+    def close(self):
+        """Stop the decoder thread and release resources."""
+        # Signal thread to stop by releasing cap (if not already done)
+        if self._cap.isOpened():
+            self._cap.release()
+        # Drain queue to unblock thread if it's waiting on put()
+        while not self._queue.empty():
+            try:
+                self._queue.get_nowait()
+            except:
+                break
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
 class VideoWriter:
     def __init__(self, output_path: str, fps: float, width: int, height: int):
         self.output_path = output_path