Spaces:

Islamckennon
/

mirage

Paused

App Files Files Community

MacBook pro commited on Sep 25, 2025

Commit

9457f1e

1 Parent(s): 5d8fdc1

feat(webrtc): hybrid inline-if-idle processing, latency & queue metrics, pipeline stats endpoint

Browse files

Files changed (4) hide show

models/_logs/download_audit.jsonl +2 -0
requirements_local.txt +18 -0
swap_pipeline.py +67 -2
webrtc_server.py +133 -41

models/_logs/download_audit.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"ts": "2025-09-25T00:33:19Z", "event": "start", "tag": "downloader"}
2	+ {"ts": "2025-09-25T00:35:16Z", "event": "download_ok", "tag": "downloader", "model": "inswapper", "path": "/Users/macbookpro/Desktop/mirage/models/inswapper/inswapper_128_fp16.onnx"}

requirements_local.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+aiortc==1.6.0
+websockets==11.0.3
+numpy==1.24.4
+opencv-python==4.8.1.78
+Pillow==10.0.1
+insightface==0.7.3
+basicsr==1.4.2
+timm==0.9.12
+python-multipart==0.0.9
+av==11.0.0
+psutil==5.9.8
+huggingface-hub==0.24.5
+onnx==1.16.1
+# Use GPU build of ONNX Runtime; required for CUDAExecutionProvider on A10G
+torch==2.1.2
+facexlib==0.3.0

swap_pipeline.py CHANGED Viewed

@@ -82,6 +82,20 @@ class FaceSwapPipeline:
         self.low_brightness_threshold = float(os.getenv('MIRAGE_LOW_BRIGHTNESS_THRESH', '40'))
         # Similarity threshold for logging (cosine similarity typical range [-1,1])
         self.similarity_warn_threshold = float(os.getenv('MIRAGE_SIMILARITY_WARN', '0.15'))
     def initialize(self):
         if self.initialized:
@@ -309,6 +323,7 @@ class FaceSwapPipeline:
         return pcm_bytes
     def process_frame(self, frame: np.ndarray) -> np.ndarray:
         if not self.initialized or self.swapper is None or self.app is None:
             self._stats['early_uninitialized'] += 1
             if self.swap_debug:
@@ -336,9 +351,27 @@ class FaceSwapPipeline:
                         logger.debug(f'Applied brightness compensation gain={gain:.2f} (brightness={brightness:.1f})')
         except Exception:
             pass
-        faces = self.app.get(frame)
         self._last_faces_cache = faces
         if not faces:
             if self.swap_debug:
                 logger.debug('process_frame: no faces detected in incoming frame')
             self._record_latency(time.time() - t0)
@@ -378,11 +411,36 @@ class FaceSwapPipeline:
                             logger.debug(f'Low similarity primary face sim={sim:.3f}')
                 except Exception:
                     pass
-                out = self.swapper.get(out, f, self.source_face, paste_back=True)
                 count += 1
             except Exception as e:
                 logger.debug(f"Swap failed for face: {e}")
         self._stats['total_faces_swapped'] += count
         # Optional debug overlay for visual confirmation
         if count > 0 and os.getenv('MIRAGE_DEBUG_OVERLAY', '0').lower() in ('1','true','yes','on'):
             try:
@@ -433,6 +491,11 @@ class FaceSwapPipeline:
         self._stats['swap_faces_last'] = count
         self._stats['frames'] += 1
         self._frame_index += 1
         return out
     def _record_latency(self, dt: float):
@@ -455,6 +518,8 @@ class FaceSwapPipeline:
             codeformer_avg_latency_ms=cf_avg,
             max_faces=self.max_faces,
             debug_overlay=os.getenv('MIRAGE_DEBUG_OVERLAY', '0'),
         )
         # Provider diagnostics (best-effort)
         try:  # pragma: no cover

         self.low_brightness_threshold = float(os.getenv('MIRAGE_LOW_BRIGHTNESS_THRESH', '40'))
         # Similarity threshold for logging (cosine similarity typical range [-1,1])
         self.similarity_warn_threshold = float(os.getenv('MIRAGE_SIMILARITY_WARN', '0.15'))
+    # Temporal reuse configuration
+    self.face_cache_ttl = int(os.getenv('MIRAGE_FACE_CACHE_TTL', '5') or '5')  # frames
+    self._cached_face = None
+    self._cached_face_age = 0
+    # Aggressive blend toggle for visibility
+    self.aggressive_blend = os.getenv('MIRAGE_AGGRESSIVE_BLEND', '0').lower() in ('1','true','yes','on')
+    # Optional face ROI upscaling for tiny faces
+    self.face_min_size = int(os.getenv('MIRAGE_FACE_MIN_SIZE', '80') or '80')
+    self.face_upscale_factor = float(os.getenv('MIRAGE_FACE_UPSCALE', '1.6'))
+    # Detector preprocessing (CLAHE) low light
+    self.det_clahe = os.getenv('MIRAGE_DET_CLAHE', '1').lower() in ('1','true','yes','on')
+    # End-to-end latency markers
+    self._last_e2e_ms = None
+    self._e2e_hist: List[float] = []
     def initialize(self):
         if self.initialized:
         return pcm_bytes
     def process_frame(self, frame: np.ndarray) -> np.ndarray:
+        frame_in_ts = time.time()
         if not self.initialized or self.swapper is None or self.app is None:
             self._stats['early_uninitialized'] += 1
             if self.swap_debug:
                         logger.debug(f'Applied brightness compensation gain={gain:.2f} (brightness={brightness:.1f})')
         except Exception:
             pass
+        # Detector preprocessing path for improved low-light detect
+        det_input = frame
+        if self.det_clahe:
+            try:
+                gray_det = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+                if float(np.mean(gray_det)) < (self.low_brightness_threshold + 15):
+                    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
+                    eq = clahe.apply(gray_det)
+                    det_input = cv2.cvtColor(eq, cv2.COLOR_GRAY2BGR)
+            except Exception:
+                pass
+        faces = self.app.get(det_input)
         self._last_faces_cache = faces
         if not faces:
+            # Attempt temporal reuse of last successful face if within ttl
+            if self._cached_face is not None and self._cached_face_age < self.face_cache_ttl:
+                faces = [self._cached_face]
+                self._cached_face_age += 1
+            else:
+                self._cached_face = None
+                self._cached_face_age = 0
             if self.swap_debug:
                 logger.debug('process_frame: no faces detected in incoming frame')
             self._record_latency(time.time() - t0)
                             logger.debug(f'Low similarity primary face sim={sim:.3f}')
                 except Exception:
                     pass
+                # Upscale small face region before swapping to reduce warping artifacts
+                try:
+                    x1,y1,x2,y2 = f.bbox.astype(int)
+                    fh = y2 - y1; fw = x2 - x1
+                    if min(fh, fw) < self.face_min_size:
+                        # Extract padded ROI, upscale, run swapper, then downscale
+                        pad = int(0.15 * max(fh, fw))
+                        h, w = out.shape[:2]
+                        rx1 = max(0, x1 - pad); ry1 = max(0, y1 - pad)
+                        rx2 = min(w, x2 + pad); ry2 = min(h, y2 + pad)
+                        roi = out[ry1:ry2, rx1:rx2]
+                        if roi.size > 0:
+                            big = cv2.resize(roi, None, fx=self.face_upscale_factor, fy=self.face_upscale_factor, interpolation=cv2.INTER_CUBIC)
+                            swapped_big = self.swapper.get(big, f, self.source_face, paste_back=False)
+                            swapped_small = cv2.resize(swapped_big, (rx2-rx1, ry2-ry1), interpolation=cv2.INTER_LINEAR)
+                            out[ry1:ry2, rx1:rx2] = swapped_small
+                        else:
+                            out = self.swapper.get(out, f, self.source_face, paste_back=True)
+                    else:
+                        out = self.swapper.get(out, f, self.source_face, paste_back=True)
+                except Exception:
+                    out = self.swapper.get(out, f, self.source_face, paste_back=True)
                 count += 1
             except Exception as e:
                 logger.debug(f"Swap failed for face: {e}")
         self._stats['total_faces_swapped'] += count
+        # Cache first face for reuse
+        if faces:
+            self._cached_face = faces[0]
+            self._cached_face_age = 0
         # Optional debug overlay for visual confirmation
         if count > 0 and os.getenv('MIRAGE_DEBUG_OVERLAY', '0').lower() in ('1','true','yes','on'):
             try:
         self._stats['swap_faces_last'] = count
         self._stats['frames'] += 1
         self._frame_index += 1
+        # End-to-end latency including pre-detection + swap path
+        self._last_e2e_ms = (time.time() - frame_in_ts) * 1000.0
+        self._e2e_hist.append(self._last_e2e_ms)
+        if len(self._e2e_hist) > 200:
+            self._e2e_hist.pop(0)
         return out
     def _record_latency(self, dt: float):
             codeformer_avg_latency_ms=cf_avg,
             max_faces=self.max_faces,
             debug_overlay=os.getenv('MIRAGE_DEBUG_OVERLAY', '0'),
+            e2e_latency_ms=self._last_e2e_ms,
+            e2e_latency_avg_ms=(float(np.mean(self._e2e_hist)) if self._e2e_hist else None),
         )
         # Provider diagnostics (best-effort)
         try:  # pragma: no cover

webrtc_server.py CHANGED Viewed

@@ -375,15 +375,33 @@ class IncomingVideoTrack(MediaStreamTrack):
         self._last_processed: Optional[np.ndarray] = None
         self._processing_task: Optional[asyncio.Task] = None
         self._lock = asyncio.Lock()
     async def recv(self):  # type: ignore[override]
         frame = await self.track.recv()
         self.frame_id += 1
         # Convert to numpy BGR for pipeline
         img = frame.to_ndarray(format="bgr24")
         h, w, _ = img.shape
         proc_input = img
-        # Optionally downscale for processing to cap latency (configurable)
         try:
             max_dim_cfg = int(os.getenv('MIRAGE_PROC_MAX_DIM', '512') or '512')
             if max_dim_cfg < 64:
@@ -398,51 +416,96 @@ class IncomingVideoTrack(MediaStreamTrack):
                 proc_input = cv2.resize(img, (max(1, scale_w), max(1, scale_h)))
         except Exception as e:
             logger.debug(f"Video downscale skip: {e}")
-        # Schedule background processing to avoid blocking recv()
-        async def _process_async(inp: np.ndarray, expected_size: tuple[int, int], fid: int):
             try:
-                logger.info(f"Processing video frame {fid}, input shape: {inp.shape}")
-                out_small = self.pipeline.process_video_frame(inp, fid)
-                logger.info(f"Pipeline returned frame shape: {out_small.shape if out_small is not None else 'None'}")
-                if out_small is None:
-                    logger.warning(f"Pipeline returned None for frame {fid}")
-                    return
-                if (out_small.shape[1], out_small.shape[0]) != expected_size:
-                    out = cv2.resize(out_small, expected_size)
-                    logger.info(f"Resized frame from {out_small.shape[:2]} to {expected_size}")
                 else:
-                    out = out_small
-                async with self._lock:
-                    self._last_processed = out
-                    logger.info(f"Stored processed frame {fid}, shape: {out.shape}")
             except Exception as ex:
-                logger.error(f"Video processing error(bg): {ex}")
-            finally:
-                self._processing_task = None
-        expected = (w, h)
-        if self._processing_task is None:
-            # Only run one processing task at a time; drop older frames
-            self._processing_task = asyncio.create_task(_process_async(proc_input, expected, self.frame_id))
-        # Use last processed if available, else pass-through
-        async with self._lock:
-            processed = self._last_processed if self._last_processed is not None else img
-            mode = 'processed' if self._last_processed is not None else 'passthrough'
-        logger.info(f"Frame {self.frame_id}: using {mode} frame, shape: {processed.shape}")
-        # Rebase timestamps to a clean monotonic sequence to avoid decoder stall if processing lagged
-        import av as _av
-        vframe = _av.VideoFrame.from_ndarray(processed, format="bgr24")
-        # Provide new pts/time_base using VideoStreamTrack helper (borrow from OutboundVideoTrack semantics)
         try:
-            pts, time_base = await OutboundVideoTrack().next_timestamp()  # ephemeral instance just for sequencing
-            vframe.pts = pts
-            vframe.time_base = time_base
         except Exception:
-            # Fallback to original timing
-            vframe.pts = frame.pts
-            vframe.time_base = frame.time_base
         return vframe
@@ -872,6 +935,35 @@ async def frame_counter():
     except Exception as e:
         return {"active": False, "error": str(e)}
 # Optional: connection monitoring endpoint for diagnostics
 if add_connection_monitoring is not None:
     try:

         self._last_processed: Optional[np.ndarray] = None
         self._processing_task: Optional[asyncio.Task] = None
         self._lock = asyncio.Lock()
+        # Latency / timing metrics
+        self._capture_ts: Optional[float] = None
+        self._last_latency_ms: Optional[float] = None
+        self._avg_latency_ms: Optional[float] = None
+        self._lat_hist: list[float] = []
+        self._queue_wait_last_ms: Optional[float] = None
+        self._queue_wait_hist: list[float] = []
+        self._frames_passthrough = 0
+        self._frames_processed = 0
+        self._frames_dropped = 0
+        self._placeholder_active = True
+        self._sync_if_idle = os.getenv('MIRAGE_SYNC_IF_IDLE','1').lower() in ('1','true','yes','on')
+        self._pts_origin: Optional[float] = None  # monotonic origin
+        self._last_sent_pts: Optional[int] = None
+        self._time_base = (1, 90000)  # 90kHz typical video clock
     async def recv(self):  # type: ignore[override]
         frame = await self.track.recv()
         self.frame_id += 1
+        capture_t = time.time()
+        if self._pts_origin is None:
+            self._pts_origin = capture_t
         # Convert to numpy BGR for pipeline
         img = frame.to_ndarray(format="bgr24")
         h, w, _ = img.shape
         proc_input = img
+        # Optional downscale (same as prior)
         try:
             max_dim_cfg = int(os.getenv('MIRAGE_PROC_MAX_DIM', '512') or '512')
             if max_dim_cfg < 64:
                 proc_input = cv2.resize(img, (max(1, scale_w), max(1, scale_h)))
         except Exception as e:
             logger.debug(f"Video downscale skip: {e}")
+        expected_size = (w, h)
+        processed: Optional[np.ndarray] = None
+        # Hybrid processing: inline if no background task running OR sync flag set; else schedule
+        if self._sync_if_idle and (self._processing_task is None):
+            t_q_start = time.time()
             try:
+                out_small = self.pipeline.process_video_frame(proc_input, self.frame_id)
+                if out_small is not None and (out_small.shape[1], out_small.shape[0]) != expected_size:
+                    processed = cv2.resize(out_small, expected_size)
                 else:
+                    processed = out_small if out_small is not None else img
+                self._queue_wait_last_ms = (time.time() - t_q_start) * 1000.0  # inclusive (no wait, pure proc)
+                self._queue_wait_hist.append(self._queue_wait_last_ms)
+                if len(self._queue_wait_hist) > 300:
+                    self._queue_wait_hist.pop(0)
+                self._frames_processed += 1
             except Exception as ex:
+                logger.debug(f"inline processing error: {ex}")
+                processed = img
+        else:
+            # Background path
+            if self._processing_task is None:
+                async def _process_async(inp: np.ndarray, expected_size: tuple[int,int], fid: int, enqueue_t: float):
+                    try:
+                        out_small = self.pipeline.process_video_frame(inp, fid)
+                        out = out_small
+                        if out_small is not None and (out_small.shape[1], out_small.shape[0]) != expected_size:
+                            out = cv2.resize(out_small, expected_size)
+                        elif out is None:
+                            out = inp  # fallback
+                        async with self._lock:
+                            self._last_processed = out
+                        q_wait = (time.time() - enqueue_t) * 1000.0
+                        self._queue_wait_last_ms = q_wait
+                        self._queue_wait_hist.append(q_wait)
+                        if len(self._queue_wait_hist) > 300:
+                            self._queue_wait_hist.pop(0)
+                        self._frames_processed += 1
+                    except Exception as ex:
+                        logger.debug(f"video processing error(bg): {ex}")
+                    finally:
+                        self._processing_task = None
+                self._processing_task = asyncio.create_task(_process_async(proc_input, expected_size, self.frame_id, time.time()))
+            # Use last processed snapshot; count passthrough if not yet available
+            async with self._lock:
+                if self._last_processed is not None:
+                    processed = self._last_processed
+                else:
+                    processed = img
+                    self._frames_passthrough += 1
+                    # We'll consider this frame 'dropped' re: processing freshness if a task already running
+                    if self._processing_task is not None:
+                        self._frames_dropped += 1
+        # Metrics update
+        proc_latency_ms = (time.time() - capture_t) * 1000.0
+        self._last_latency_ms = proc_latency_ms
+        self._lat_hist.append(proc_latency_ms)
+        if len(self._lat_hist) > 300:
+            self._lat_hist.pop(0)
+        self._avg_latency_ms = float(np.mean(self._lat_hist)) if self._lat_hist else None
+        # Placeholder becomes inactive as soon as we emit a frame post-first capture
+        if self._placeholder_active:
+            self._placeholder_active = False
+        # Timestamp handling: derive pts from capture time relative to origin on a 90kHz clock
         try:
+            clock_rate = 90000
+            rel_sec = capture_t - (self._pts_origin or capture_t)
+            pts = int(rel_sec * clock_rate)
+            # Guard against monotonic regressions
+            if self._last_sent_pts is not None and pts <= self._last_sent_pts:
+                pts = self._last_sent_pts + int(clock_rate / 30)  # assume ~30fps minimal increment
+            self._last_sent_pts = pts
         except Exception:
+            pts = frame.pts if frame.pts is not None else 0
+        import av as _av
+        vframe = _av.VideoFrame.from_ndarray(processed, format="bgr24")
+        vframe.pts = pts
+        vframe.time_base = _av.time_base.TimeBase(num=1, den=90000) if hasattr(_av, 'time_base') else frame.time_base
+        if (self.frame_id % 120) == 0:
+            logger.debug(
+                f"vid frame={self.frame_id} inline={self._sync_if_idle and self._processing_task is None} "
+                f"proc_ms={proc_latency_ms:.1f} avg_ms={self._avg_latency_ms:.1f if self._avg_latency_ms else None} "
+                f"queue_wait_last={self._queue_wait_last_ms} passthrough={self._frames_passthrough} dropped={self._frames_dropped}"
+            )
         return vframe
     except Exception as e:
         return {"active": False, "error": str(e)}
+@router.get("/pipeline_stats")
+async def pipeline_stats():
+    """Return merged swap pipeline stats and live video track latency metrics."""
+    try:
+        pipeline = get_pipeline()
+        base_stats = pipeline.get_performance_stats() if getattr(pipeline, 'loaded', False) else {}
+        # Attempt to locate the active IncomingVideoTrack via peer senders
+        track_stats = {}
+        try:
+            st = _peer_state
+            if st is not None:
+                pc = st.pc
+                for sender in pc.getSenders():
+                    tr = getattr(sender, 'track', None)
+                    if tr and isinstance(tr, MediaStreamTrack) and getattr(tr, 'kind', None) == 'video':
+                        # Heuristic: if it has our added attributes
+                        for attr in [
+                            '_last_latency_ms','_avg_latency_ms','_queue_wait_last_ms','_frames_passthrough',
+                            '_frames_processed','_frames_dropped','_placeholder_active'
+                        ]:
+                            if hasattr(tr, attr):
+                                track_stats[attr.lstrip('_')] = getattr(tr, attr)
+                        break
+        except Exception as e:
+            track_stats['error'] = f"track_stats: {e}"
+        return {"pipeline": base_stats, "video_track": track_stats}
+    except Exception as e:
+        return {"error": str(e)}
 # Optional: connection monitoring endpoint for diagnostics
 if add_connection_monitoring is not None:
     try: