Spaces:

BiasLab2025
/

perception

Paused

App Files Files Community

Zhen Ye commited on 8 days ago

Commit

dbfb5c9

1 Parent(s): 6375955

Event-driven MJPEG streaming, GPT concurrency semaphore, pre-resize frames in publisher

Browse files

Files changed (2) hide show

app.py +44 -62
jobs/streaming.py +71 -17

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ from inference import process_first_frame, run_inference, run_segmentation
 from models.depth_estimators.model_loader import list_depth_estimators
 from jobs.background import process_video_async
 from jobs.models import JobInfo, JobStatus
-from jobs.streaming import get_stream
 from jobs.storage import (
     get_depth_output_path,
     get_first_frame_depth_path,
@@ -66,6 +66,9 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
 logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
 logging.getLogger("transformers").setLevel(logging.WARNING)
 async def _periodic_cleanup() -> None:
@@ -405,33 +408,17 @@ async def detect_async_endpoint(
     active_depth = depth_estimator if enable_depth else None
     try:
-        processed_frame, detections, depth_map, first_frame_gpt_results = process_first_frame(
             str(input_path),
             query_list,
             mode=mode,
             detector_name=detector_name,
             segmenter_name=segmenter,
-            depth_estimator_name=active_depth,
-            depth_scale=depth_scale,
-            enable_depth_estimator=enable_depth,
-            enable_gpt=enable_gpt,
-            mission_spec=mission_spec,
         )
         cv2.imwrite(str(first_frame_path), processed_frame)
-        if depth_map is not None:
-             # Simple visualization: Normalize and apply colormap
-             try:
-                 d_min, d_max = np.min(depth_map), np.max(depth_map)
-                 if d_max - d_min > 1e-6:
-                     d_norm = (depth_map - d_min) / (d_max - d_min)
-                 else:
-                     d_norm = np.zeros_like(depth_map)
-                 d_uint8 = (d_norm * 255).astype(np.uint8)
-                 d_color = cv2.applyColorMap(d_uint8, cv2.COLORMAP_INFERNO)
-                 cv2.imwrite(str(first_frame_depth_path), d_color)
-             except Exception as e:
-                 logging.warning(f"Failed to save depth map: {e}")
     except Exception:
         logging.exception("First-frame processing failed.")
         shutil.rmtree(job_dir, ignore_errors=True)
@@ -632,70 +619,64 @@ async def detect_first_frame_depth(job_id: str):
 @app.get("/detect/stream/{job_id}")
 async def stream_video(job_id: str):
-    """MJPEG stream of the processing video (optimized)."""
-    import queue
     async def stream_generator():
         loop = asyncio.get_running_loop()
         buffered = False
         while True:
             q = get_stream(job_id)
             if not q:
                 break
             try:
                 # Initial Buffer: Wait until we have enough frames or job is done
                 if not buffered:
                     if q.qsize() < 30:
-                        # If queue is empty, wait a bit
                         await asyncio.sleep(0.1)
-                        # Check if job is still running? For now just wait for buffer or stream close
                         continue
                     buffered = True
-                # Get ONE frame (no skipping)
-                # Use wait to allow generator to yield cleanly
-                try:
-                    # Blocking get in executor to avoid hanging async loop?
-                    # Actually standard queue.get() is blocking. get_nowait is not.
-                    # We can sleep-poll for async compatibility
-                    while q.empty():
-                        await asyncio.sleep(0.01)
-                        if not get_stream(job_id): # Stream closed
                             return
                     frame = q.get_nowait()
-                except queue.Empty:
                     continue
-                # Resize if too big (e.g. > 640 width)
-                # Optimization: Only resize if needed
-                h, w = frame.shape[:2]
-                if w > 640:
-                    scale = 640 / w
-                    new_h = int(h * scale)
-                    frame = cv2.resize(frame, (640, new_h), interpolation=cv2.INTER_NEAREST)
-                # Encode in thread
-                # JPEG Quality = 60 (Better quality for smooth video)
                 encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 60]
                 success, buffer = await loop.run_in_executor(None, cv2.imencode, '.jpg', frame, encode_param)
                 if success:
                     yield (b'--frame\r\n'
                            b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
-                # Control playback speed?
-                # If we blast frames as fast as possible, it might play accelerated.
-                # Ideally we want to sync to ~30fps.
-                await asyncio.sleep(0.033) # Simple pacer (~30fps)
             except Exception:
                 await asyncio.sleep(0.1)
     return StreamingResponse(
-        stream_generator(),
         media_type="multipart/x-mixed-replace; boundary=frame"
     )
@@ -725,8 +706,8 @@ async def reason_track(
     # This is blocking, but that's expected for this endpoint structure.
     # For high concurrency, might want to offload to threadpool or async wrapper.
     try:
-        # estimate_threat_gpt reads the file from disk
-        results = await asyncio.to_thread(estimate_threat_gpt, input_path, track_list)
         logging.info(f"GPT Output for Video Track Update:\n{results}")
     except Exception as e:
         logging.exception("GPT reasoning failed")
@@ -775,11 +756,12 @@ async def chat_threat_endpoint(
         except json_module.JSONDecodeError:
             pass  # Non-critical, proceed without mission context
-    # Run chat in thread to avoid blocking
     try:
-        response = await asyncio.to_thread(
-            chat_about_threats, question, detection_list, mission_spec_dict
-        )
         return {"response": response}
     except Exception as e:
         logging.exception("Threat chat failed")

 from models.depth_estimators.model_loader import list_depth_estimators
 from jobs.background import process_video_async
 from jobs.models import JobInfo, JobStatus
+from jobs.streaming import get_stream, get_stream_event
 from jobs.storage import (
     get_depth_output_path,
     get_first_frame_depth_path,
 logging.getLogger("huggingface_hub").setLevel(logging.WARNING)
 logging.getLogger("transformers").setLevel(logging.WARNING)
+# GPT concurrency limiter — prevents thread exhaustion under load
+_GPT_SEMAPHORE = asyncio.Semaphore(int(os.environ.get("GPT_CONCURRENCY_LIMIT", "4")))
 async def _periodic_cleanup() -> None:
     active_depth = depth_estimator if enable_depth else None
     try:
+        processed_frame, detections = process_first_frame(
             str(input_path),
             query_list,
             mode=mode,
             detector_name=detector_name,
             segmenter_name=segmenter,
         )
         cv2.imwrite(str(first_frame_path), processed_frame)
+        # GPT and depth are now handled in the async pipeline (enrichment thread)
+        depth_map = None
+        first_frame_gpt_results = None
     except Exception:
         logging.exception("First-frame processing failed.")
         shutil.rmtree(job_dir, ignore_errors=True)
 @app.get("/detect/stream/{job_id}")
 async def stream_video(job_id: str):
+    """MJPEG stream of the processing video (event-driven)."""
+    import queue as queue_mod
     async def stream_generator():
         loop = asyncio.get_running_loop()
         buffered = False
+        # Get or create the asyncio.Event for this stream (must be in async context)
+        event = get_stream_event(job_id)
         while True:
             q = get_stream(job_id)
             if not q:
                 break
             try:
                 # Initial Buffer: Wait until we have enough frames or job is done
                 if not buffered:
                     if q.qsize() < 30:
                         await asyncio.sleep(0.1)
                         continue
                     buffered = True
+                # Event-driven wait — replaces busy-wait polling
+                if event is not None:
+                    try:
+                        await asyncio.wait_for(event.wait(), timeout=1.0)
+                        event.clear()
+                    except asyncio.TimeoutError:
+                        if not get_stream(job_id):
                             return
+                        continue
+                else:
+                    # Fallback if no event (shouldn't happen)
+                    await asyncio.sleep(0.033)
+                # Drain available frame (already pre-resized by publish_frame)
+                try:
                     frame = q.get_nowait()
+                except queue_mod.Empty:
                     continue
+                # Encode in thread (frame already resized by publish_frame)
                 encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 60]
                 success, buffer = await loop.run_in_executor(None, cv2.imencode, '.jpg', frame, encode_param)
                 if success:
                     yield (b'--frame\r\n'
                            b'Content-Type: image/jpeg\r\n\r\n' + buffer.tobytes() + b'\r\n')
+                # Simple pacer (~30fps)
+                await asyncio.sleep(0.033)
             except Exception:
                 await asyncio.sleep(0.1)
     return StreamingResponse(
+        stream_generator(),
         media_type="multipart/x-mixed-replace; boundary=frame"
     )
     # This is blocking, but that's expected for this endpoint structure.
     # For high concurrency, might want to offload to threadpool or async wrapper.
     try:
+        async with _GPT_SEMAPHORE:
+            results = await asyncio.to_thread(estimate_threat_gpt, input_path, track_list)
         logging.info(f"GPT Output for Video Track Update:\n{results}")
     except Exception as e:
         logging.exception("GPT reasoning failed")
         except json_module.JSONDecodeError:
             pass  # Non-critical, proceed without mission context
+    # Run chat in thread to avoid blocking (with concurrency limit)
     try:
+        async with _GPT_SEMAPHORE:
+            response = await asyncio.to_thread(
+                chat_about_threats, question, detection_list, mission_spec_dict
+            )
         return {"response": response}
     except Exception as e:
         logging.exception("Threat chat failed")

jobs/streaming.py CHANGED Viewed

@@ -1,26 +1,63 @@
 import queue
-from typing import Dict, Optional, Any
 from threading import Lock
 # Global registry of active streams
-# Key: job_id -> Queue[frame_data]
-_STREAMS: Dict[str, queue.Queue] = {}
 _LOCK = Lock()
 def create_stream(job_id: str) -> queue.Queue:
-    """Create a new stream queue for a job."""
     with _LOCK:
-        # standard Queue, thread-safe
-        # maxsize to prevent memory explosion if consumer is slow
-        # Buffer increased to 300 (approx 10s at 30fps) for smooth streaming
         q = queue.Queue(maxsize=60)
-        _STREAMS[job_id] = q
         return q
 def get_stream(job_id: str) -> Optional[queue.Queue]:
     """Get the stream queue for a job."""
     with _LOCK:
-        return _STREAMS.get(job_id)
 def remove_stream(job_id: str) -> None:
     """Remove a stream queue."""
@@ -28,15 +65,32 @@ def remove_stream(job_id: str) -> None:
         if job_id in _STREAMS:
             del _STREAMS[job_id]
 def publish_frame(job_id: str, frame: Any) -> None:
-    """Publish a frame to a specific job's stream. Non-blocking drop if full."""
-    q = get_stream(job_id)
-    if q:
-        try:
-            q.put_nowait(frame)
-        except queue.Full:
-            # Drop frame if consumer is too slow
-            pass
 def publish_frame_to_queue(q: queue.Queue, frame: Any) -> None:
     """Publish to a specific queue object. Non-blocking drop."""

+import asyncio
 import queue
+from typing import Dict, Optional, Any, Tuple
 from threading import Lock
+import cv2
+import numpy as np
 # Global registry of active streams
+# Key: job_id -> (Queue[frame_data], asyncio.Event)
+_STREAMS: Dict[str, Tuple[queue.Queue, asyncio.Event]] = {}
 _LOCK = Lock()
+_STREAM_MAX_WIDTH = 640
+def _resize_for_stream(frame: np.ndarray) -> np.ndarray:
+    """Resize frame for streaming (cap at 640px width). Pre-resizes in publisher thread."""
+    h, w = frame.shape[:2]
+    if w > _STREAM_MAX_WIDTH:
+        scale = _STREAM_MAX_WIDTH / w
+        new_h = int(h * scale)
+        return cv2.resize(frame, (_STREAM_MAX_WIDTH, new_h), interpolation=cv2.INTER_NEAREST)
+    return frame
 def create_stream(job_id: str) -> queue.Queue:
+    """Create a new stream queue for a job. Returns the queue for backward compat."""
     with _LOCK:
         q = queue.Queue(maxsize=60)
+        # Create event — will be lazily bound to the correct event loop
+        event = None  # Lazily created in get_stream_event
+        _STREAMS[job_id] = (q, event)
         return q
 def get_stream(job_id: str) -> Optional[queue.Queue]:
     """Get the stream queue for a job."""
     with _LOCK:
+        entry = _STREAMS.get(job_id)
+        if entry:
+            return entry[0]
+        return None
+def get_stream_event(job_id: str) -> Optional[asyncio.Event]:
+    """Get or create the asyncio.Event for a job's stream.
+    Must be called from the async event loop that will await the event.
+    """
+    with _LOCK:
+        entry = _STREAMS.get(job_id)
+        if not entry:
+            return None
+        q, event = entry
+        if event is None:
+            event = asyncio.Event()
+            _STREAMS[job_id] = (q, event)
+        return event
 def remove_stream(job_id: str) -> None:
     """Remove a stream queue."""
         if job_id in _STREAMS:
             del _STREAMS[job_id]
 def publish_frame(job_id: str, frame: Any) -> None:
+    """Publish a pre-resized frame to a job's stream. Non-blocking drop if full.
+    Also sets the asyncio.Event to wake the stream consumer immediately.
+    """
+    with _LOCK:
+        entry = _STREAMS.get(job_id)
+    if not entry:
+        return
+    q, event = entry
+    # Pre-resize for streaming (avoids resize in async handler)
+    resized = _resize_for_stream(frame)
+    try:
+        q.put_nowait(resized)
+    except queue.Full:
+        # Drop frame if consumer is too slow
+        pass
+    # Wake the async consumer if waiting
+    if event is not None:
+        event.set()
 def publish_frame_to_queue(q: queue.Queue, frame: Any) -> None:
     """Publish to a specific queue object. Non-blocking drop."""