Spaces:

BiasLab2025
/

detection_base

Paused

Zhen Ye Claude Opus 4.6 commited on Feb 21

Commit

f09ca9c

1 Parent(s): b1974e2

fix: replace gated GSAM2 streaming with adaptive-rate publisher thread

The old streaming approach (60-frame startup buffer + 20-frame safety
threshold + 3x frame duplication) prevented any frames from reaching
the frontend during processing. Replace with a dedicated publisher
thread that measures production rate during a 5s startup window, then
streams at 85% of measured rate with adaptive adjustments and heartbeat
keepalives. Reduce endpoint startup buffer from 30 to 5 frames.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show

app.py +1 -1
inference.py +124 -54
jobs/streaming.py +1 -9

app.py CHANGED Viewed

@@ -775,7 +775,7 @@ async def stream_video(job_id: str):
             try:
                 # Initial Buffer: Wait until we have enough frames or job is done
                 if not buffered:
-                    if q.qsize() < 30:
                         await asyncio.sleep(0.1)
                         continue
                     buffered = True

             try:
                 # Initial Buffer: Wait until we have enough frames or job is done
                 if not buffered:
+                    if q.qsize() < 5:
                         await asyncio.sleep(0.1)
                         continue
                     buffered = True

inference.py CHANGED Viewed

@@ -5,6 +5,7 @@
 #     del os.environ["CUDA_VISIBLE_DEVICES"]
 import os
 import logging
 import time
 from threading import Event, RLock, Thread
@@ -1820,16 +1821,16 @@ def run_grounded_sam2_tracking(
                 except Exception as e:
                     logging.error("GSAM2 enrichment thread failed for frame %d: %s", frame_idx, e)
         def _writer_loop():
             nonlocal render_done
             next_idx = 0
             buf: Dict[int, Tuple] = {}
-            # Streaming constants
-            STARTUP_BUFFER = 60
-            SAFETY_THRESHOLD = 20
-            FRAME_DUP = 3
             # Per-track bbox history (replaces ByteTracker for GSAM2)
             track_history: Dict[int, List] = {}
             speed_est = SpeedEstimator(fps=fps) if enable_gpt else None
@@ -1845,28 +1846,7 @@ def run_grounded_sam2_tracking(
                 with StreamingVideoWriter(
                     output_video_path, fps, width, height
                 ) as writer:
-                    # --- Phase 1: Startup buffering ---
-                    playback_started = False
-                    while not playback_started:
-                        try:
-                            idx, frm, fobjs = render_out.get(timeout=1.0)
-                            buf[idx] = (frm, fobjs)
-                        except Empty:
-                            if not any(t.is_alive() for t in r_workers) and render_out.empty():
-                                playback_started = True
-                                break
-                            continue
-                        ahead = sum(1 for k in buf if k >= next_idx)
-                        if ahead >= STARTUP_BUFFER or ahead >= total_frames:
-                            playback_started = True
-                    logging.info(
-                        "Startup buffer filled (%d frames), beginning playback",
-                        len(buf),
-                    )
-                    # --- Phase 2: Write + stream with safety gating ---
                     while next_idx < total_frames:
                         try:
                             while next_idx not in buf:
@@ -1985,34 +1965,10 @@ def run_grounded_sam2_tracking(
                             if _perf_metrics is not None:
                                 _perf_metrics["writer_total_ms"] += (time.perf_counter() - _t_w) * 1000.0
-                            # --- Streaming with buffer gating + frame duplication ---
                             if stream_queue or job_id:
-                                # Drain any immediately available frames for accurate buffer level
-                                while True:
-                                    try:
-                                        idx2, frm2, fobjs2 = render_out.get_nowait()
-                                        buf[idx2] = (frm2, fobjs2)
-                                    except Empty:
-                                        break
-                                buffer_ahead = sum(1 for k in buf if k > next_idx)
-                                if buffer_ahead >= SAFETY_THRESHOLD or next_idx >= total_frames - 1:
-                                    from jobs.streaming import publish_frame as _pub
-                                    if job_id:
-                                        for _ in range(FRAME_DUP):
-                                            _pub(job_id, frm)
-                                    else:
-                                        for _ in range(FRAME_DUP):
-                                            try:
-                                                stream_queue.put(frm, timeout=0.01)
-                                            except Exception:
-                                                pass
-                                else:
-                                    logging.debug(
-                                        "Stream paused: buffer=%d < threshold=%d at frame %d",
-                                        buffer_ahead, SAFETY_THRESHOLD, next_idx,
-                                    )
                             next_idx += 1
                             if next_idx % 30 == 0:
@@ -2032,6 +1988,7 @@ def run_grounded_sam2_tracking(
                             continue
             finally:
                 render_done = True
                 # Shut down enrichment thread
                 if enrich_thread:
                     try:
@@ -2040,9 +1997,120 @@ def run_grounded_sam2_tracking(
                     except Exception:
                         logging.warning("GSAM2 enrichment thread shutdown timed out")
         writer_thread = Thread(target=_writer_loop, daemon=True)
         writer_thread.start()
         # ==================================================================
         # Phase 1-4: Tracking  (single-GPU fallback vs multi-GPU pipeline)
         # Segments are fed incrementally to render_in as they complete.
@@ -2397,6 +2465,8 @@ def run_grounded_sam2_tracking(
         for t in r_workers:
             t.join()
         writer_thread.join()
         if _perf_metrics is not None:
             _perf_metrics["end_to_end_ms"] = (time.perf_counter() - _t_e2e) * 1000.0

 #     del os.environ["CUDA_VISIBLE_DEVICES"]
 import os
+import collections
 import logging
 import time
 from threading import Event, RLock, Thread
                 except Exception as e:
                     logging.error("GSAM2 enrichment thread failed for frame %d: %s", frame_idx, e)
+        # Shared streaming state (publisher ↔ writer)
+        _stream_deque: collections.deque = collections.deque(maxlen=200)
+        _stream_lock = RLock()
+        _stream_writer_done = Event()
         def _writer_loop():
             nonlocal render_done
             next_idx = 0
             buf: Dict[int, Tuple] = {}
             # Per-track bbox history (replaces ByteTracker for GSAM2)
             track_history: Dict[int, List] = {}
             speed_est = SpeedEstimator(fps=fps) if enable_gpt else None
                 with StreamingVideoWriter(
                     output_video_path, fps, width, height
                 ) as writer:
+                    # --- Write + stream (publisher handles pacing) ---
                     while next_idx < total_frames:
                         try:
                             while next_idx not in buf:
                             if _perf_metrics is not None:
                                 _perf_metrics["writer_total_ms"] += (time.perf_counter() - _t_w) * 1000.0
+                            # --- Deposit frame for stream publisher ---
                             if stream_queue or job_id:
+                                with _stream_lock:
+                                    _stream_deque.append(frm)
                             next_idx += 1
                             if next_idx % 30 == 0:
                             continue
             finally:
                 render_done = True
+                _stream_writer_done.set()
                 # Shut down enrichment thread
                 if enrich_thread:
                     try:
                     except Exception:
                         logging.warning("GSAM2 enrichment thread shutdown timed out")
+        def _stream_publisher_thread():
+            """Adaptive-rate publisher: reads from _stream_deque, publishes at measured pace."""
+            from jobs.streaming import publish_frame as _pub
+            STARTUP_WAIT = 5.0      # max seconds to accumulate before streaming
+            MIN_FPS = 2.0
+            MAX_FPS = 30.0
+            HEARTBEAT_INTERVAL = 0.5  # re-publish last frame if deque empty
+            LOW_WATER = 10
+            HIGH_WATER = 50
+            ADJUST_INTERVAL = 1.0
+            last_frame = None
+            published = 0
+            # --- Phase 1: startup accumulation ---
+            t_start = time.perf_counter()
+            while True:
+                elapsed = time.perf_counter() - t_start
+                if elapsed >= STARTUP_WAIT:
+                    break
+                if _stream_writer_done.is_set():
+                    break
+                time.sleep(0.1)
+            with _stream_lock:
+                accumulated = len(_stream_deque)
+            elapsed = max(time.perf_counter() - t_start, 0.1)
+            r_prod = accumulated / elapsed if accumulated > 0 else 10.0
+            r_stream = max(MIN_FPS, min(MAX_FPS, 0.85 * r_prod))
+            logging.info(
+                "Stream publisher started: R_prod=%.1f fps, R_stream=%.1f fps, "
+                "accumulated=%d frames in %.1fs",
+                r_prod, r_stream, accumulated, elapsed,
+            )
+            # --- Phase 2: adaptive streaming ---
+            last_adjust = time.perf_counter()
+            last_publish_time = 0.0
+            while True:
+                frame_interval = 1.0 / r_stream
+                # Try to pop a frame
+                frame = None
+                with _stream_lock:
+                    if _stream_deque:
+                        frame = _stream_deque.popleft()
+                if frame is not None:
+                    last_frame = frame
+                    if job_id:
+                        _pub(job_id, frame)
+                    elif stream_queue:
+                        try:
+                            stream_queue.put(frame, timeout=0.01)
+                        except Exception:
+                            pass
+                    published += 1
+                    last_publish_time = time.perf_counter()
+                    time.sleep(frame_interval)
+                else:
+                    # Deque empty — check termination
+                    if _stream_writer_done.is_set():
+                        with _stream_lock:
+                            if not _stream_deque:
+                                break
+                        continue
+                    # Heartbeat: re-publish last frame to keep MJPEG alive
+                    now = time.perf_counter()
+                    if last_frame is not None and (now - last_publish_time) >= HEARTBEAT_INTERVAL:
+                        if job_id:
+                            _pub(job_id, last_frame)
+                        elif stream_queue:
+                            try:
+                                stream_queue.put(last_frame, timeout=0.01)
+                            except Exception:
+                                pass
+                        last_publish_time = now
+                    time.sleep(0.05)
+                # Adaptive rate adjustment (every ~1s)
+                now = time.perf_counter()
+                if now - last_adjust >= ADJUST_INTERVAL:
+                    with _stream_lock:
+                        level = len(_stream_deque)
+                    if level < LOW_WATER:
+                        r_stream = max(MIN_FPS, r_stream * 0.9)
+                    elif level > HIGH_WATER:
+                        r_stream = min(MAX_FPS, r_stream * 1.05)
+                    last_adjust = now
+            # Publish final frame
+            if last_frame is not None:
+                if job_id:
+                    _pub(job_id, last_frame)
+                elif stream_queue:
+                    try:
+                        stream_queue.put(last_frame, timeout=0.01)
+                    except Exception:
+                        pass
+            logging.info("Stream publisher finished: published %d frames", published)
         writer_thread = Thread(target=_writer_loop, daemon=True)
         writer_thread.start()
+        _publisher_thread = None
+        if stream_queue or job_id:
+            _publisher_thread = Thread(target=_stream_publisher_thread, daemon=True)
+            _publisher_thread.start()
         # ==================================================================
         # Phase 1-4: Tracking  (single-GPU fallback vs multi-GPU pipeline)
         # Segments are fed incrementally to render_in as they complete.
         for t in r_workers:
             t.join()
         writer_thread.join()
+        if _publisher_thread is not None:
+            _publisher_thread.join(timeout=15)
         if _perf_metrics is not None:
             _perf_metrics["end_to_end_ms"] = (time.perf_counter() - _t_e2e) * 1000.0

jobs/streaming.py CHANGED Viewed

@@ -27,7 +27,7 @@ def _resize_for_stream(frame: np.ndarray) -> np.ndarray:
 def create_stream(job_id: str) -> queue.Queue:
     """Create a new stream queue for a job. Returns the queue for backward compat."""
     with _LOCK:
-        q = queue.Queue(maxsize=60)
         # Create event — will be lazily bound to the correct event loop
         event = None  # Lazily created in get_stream_event
         _STREAMS[job_id] = (q, event)
@@ -90,11 +90,3 @@ def publish_frame(job_id: str, frame: Any) -> None:
     # Wake the async consumer if waiting
     if event is not None:
         event.set()
-def publish_frame_to_queue(q: queue.Queue, frame: Any) -> None:
-    """Publish to a specific queue object. Non-blocking drop."""
-    try:
-        q.put_nowait(frame)
-    except queue.Full:
-        pass

 def create_stream(job_id: str) -> queue.Queue:
     """Create a new stream queue for a job. Returns the queue for backward compat."""
     with _LOCK:
+        q = queue.Queue(maxsize=120)
         # Create event — will be lazily bound to the correct event loop
         event = None  # Lazily created in get_stream_event
         _STREAMS[job_id] = (q, event)
     # Wake the async consumer if waiting
     if event is not None:
         event.set()