Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 15, 2025

Commit

bd64f57

1 Parent(s): 80ac736

agent 1.5

Browse files

Files changed (1) hide show

models/matanyone_loader.py +278 -57

models/matanyone_loader.py CHANGED Viewed

@@ -105,17 +105,56 @@ class MatAnyoneSession:
     """
     def __init__(self, device: Optional[str] = None, precision: str = "auto"):
         self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
-        self.precision = precision
         self._core = None
         self._api_mode = None  # "step", "process_frame", or "process_video"
         self._lazy_init()
     def _lazy_init(self) -> None:
         try:
             from matanyone.inference.inference_core import InferenceCore  # type: ignore
         except Exception as e:
-            raise MatAnyError(f"MatAnyone import failed: {e}")
         # Try zero-arg first, then repo-id variant
         try:
@@ -203,16 +242,38 @@ def _run_frame(self, frame_bgr: np.ndarray, seed_1hw: Optional[np.ndarray], is_f
             raise MatAnyError(f"mask_t must be HW; got {tuple(mask_t.shape)}")
         # --- Process with MatAnyone ---
-        with torch.no_grad(), self._maybe_amp():
-            try:
                 if self._api_mode == "step":
                     alpha = self._core.step(img_t, mask_t) if mask_t is not None else self._core.step(img_t)
                 elif self._api_mode == "process_frame":
                     alpha = self._core.process_frame(img_t, mask_t)
                 else:
-                    raise MatAnyError("Internal: _run_frame used while API mode is 'process_video'.")
-            except Exception as e:
-                raise MatAnyError(f"MatAnyone processing failed: {str(e)}")
         # --- Process output ---
         # Convert to numpy and ensure correct shape/range
@@ -306,18 +367,42 @@ def process_stream(
         """Process video stream with MatAnyone.
         Args:
-            video_path: Input video file
             seed_mask_path: Optional seed mask image (grayscale, same size as video)
             out_dir: Output directory (default: video_path.parent)
             progress_cb: Callback for progress updates (signature: (float, str) or (str,))
         Returns:
             Tuple of (alpha_path, fg_path) output video paths
         """
         if out_dir is None:
             out_dir = video_path.parent
         out_dir = Path(out_dir)
         out_dir.mkdir(parents=True, exist_ok=True)
         cap = cv2.VideoCapture(str(video_path))
         if not cap.isOpened():
@@ -330,32 +415,64 @@ def process_stream(
         cap.release()
         log.info(f"[MATANY] Processing {N} frames ({W}x{H} @ {fps:.1f}fps) from {video_path}")
         if self._api_mode == "process_video":
             # --- PATH-BASED CALL (this wheel expects a video path, not tensors) ---
-            _emit_progress(progress_cb, 0.05, "MatAnyone (video mode)…")
-            # Some builds accept (video_path, seed_mask_path), others just (video_path)
-            try:
-                res = self._core.process_video(str(video_path),
-                                            str(seed_mask_path) if seed_mask_path is not None else None)
-            except TypeError:
-                # Fallback: only video path
-                res = self._core.process_video(str(video_path))
-            # Normalize whatever we got back into alpha.mp4 + fg.mp4 in out_dir
-            alpha_path, fg_path = self._harvest_process_video_output(res, out_dir, base=video_path.stem)
-            _validate_nonempty(alpha_path)
-            _validate_nonempty(fg_path)
-            _emit_progress(progress_cb, 1.0, "MatAnyone complete")
-            return alpha_path, fg_path
         else:
             # Frame-by-frame (preferred)
             log.info(f"[MATANY] Using frame-by-frame mode: {self._api_mode}")
             cap = cv2.VideoCapture(str(video_path))
             alpha_path = out_dir / "alpha.mp4"
             fg_path = out_dir / "fg.mp4"
             alpha_writer = cv2.VideoWriter(
                 str(alpha_path),
                 cv2.VideoWriter_fourcc(*'mp4v'),
@@ -370,6 +487,9 @@ def process_stream(
                 (W, H),
                 isColor=True
             )
             try:
                 # Load seed mask if provided
@@ -378,40 +498,141 @@ def process_stream(
                     seed_1hw = _read_mask_hw(seed_mask_path, (H, W))
                 idx = 0
-                while True:
-                    ret, frame = cap.read()
-                    if not ret:
-                        break
-                    if idx % 10 == 0:
-                        _emit_progress(progress_cb, min(0.999, (idx / N) if N > 0 else 0.0),
-                                     f"MatAnyone matting… ({idx}/{N})")
-                    log.debug(f"[MATANY] Processing frame {idx+1}/{N}")
-                    # Only pass seed mask on first frame
-                    current_mask = seed_1hw if idx == 0 else None
-                    alpha_hw = self._run_frame(frame, current_mask, is_first=(idx == 0))
-                    # compose fg for immediate write
-                    # alpha 0..1 -> 0..255 3-channel grayscale
-                    alpha_u8 = (alpha_hw * 255.0 + 0.5).astype(np.uint8)
-                    alpha_rgb = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
-                    # Blend: fg = alpha*frame + (1-alpha)*black == alpha*frame
-                    fg_bgr = (frame.astype(np.float32) * (alpha_hw[..., None] / 255.0)).astype(np.uint8)
-                    # Write outputs
-                    alpha_writer.write(alpha_rgb)
-                    fg_writer.write(fg_bgr)
-                    idx += 1
-            finally:
-                cap.release()
-                alpha_writer.release()
-                fg_writer.release()
-                _validate_nonempty(alpha_path)
-                _validate_nonempty(fg_path)
-                _emit_progress(progress_cb, 1.0, "MatAnyone complete")
-                return alpha_path, fg_path
     def _flush_chunk(self, frames_bgr, seed_1hw, alpha_writer, fg_writer):
         """Process a chunk of frames with MatAnyone."""

     """
     def __init__(self, device: Optional[str] = None, precision: str = "auto"):
+        """Initialize MatAnyoneSession with optional device and precision settings.
+        Args:
+            device: Device to run on (e.g., 'cuda', 'cpu', 'cuda:0'). If None, auto-detects CUDA.
+            precision: One of 'auto', 'fp32', or 'fp16'. 'auto' uses fp16 if CUDA is available.
+        """
         self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
+        self.precision = precision.lower()
         self._core = None
         self._api_mode = None  # "step", "process_frame", or "process_video"
+        self._frame_times = []
+        self._start_time = 0.0
+        self._gpu_mem_allocated = 0.0
+        self._gpu_mem_cached = 0.0
         self._lazy_init()
+        # Log initialization
+        log.info(f"Initialized MatAnyoneSession on {self.device} with precision {self.precision}")
+        if torch.cuda.is_available():
+            log.info(f"CUDA device: {torch.cuda.get_device_name(self.device)}")
+            self._log_gpu_memory()
+    def _log_gpu_memory(self) -> None:
+        """Log current GPU memory usage."""
+        if torch.cuda.is_available():
+            try:
+                allocated = torch.cuda.memory_allocated(self.device) / 1024**2
+                cached = torch.cuda.memory_reserved(self.device) / 1024**2
+                log.info(f"GPU Memory - Allocated: {allocated:.1f}MB, Cached: {cached:.1f}MB")
+                return allocated, cached
+            except Exception as e:
+                log.warning(f"Failed to get GPU memory info: {e}")
+        return 0.0, 0.0
     def _lazy_init(self) -> None:
+        """Lazy initialization of the MatAnyone inference core."""
         try:
             from matanyone.inference.inference_core import InferenceCore  # type: ignore
+        except ImportError as e:
+            raise MatAnyError(f"Failed to import MatAnyone: {e}. Please ensure it's installed correctly.")
         except Exception as e:
+            raise MatAnyError(f"Unexpected error during MatAnyone import: {e}")
+        # Log GPU info
+        if torch.cuda.is_available():
+            log.info(f"[GPU] CUDA is available. Device: {torch.cuda.get_device_name(0)}")
+            log.info(f"[GPU] Memory allocated: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
+            log.info(f"[GPU] Memory cached: {torch.cuda.memory_reserved()/1024**2:.1f}MB")
+        else:
+            log.warning("[GPU] CUDA is not available. Using CPU (this will be slow!)")
         # Try zero-arg first, then repo-id variant
         try:
             raise MatAnyError(f"mask_t must be HW; got {tuple(mask_t.shape)}")
         # --- Process with MatAnyone ---
+        frame_start_time = time.time()
+        try:
+            with torch.no_grad(), self._maybe_amp():
                 if self._api_mode == "step":
                     alpha = self._core.step(img_t, mask_t) if mask_t is not None else self._core.step(img_t)
                 elif self._api_mode == "process_frame":
                     alpha = self._core.process_frame(img_t, mask_t)
                 else:
+                    raise MatAnyError("Internal error: Invalid API mode")
+                # Log performance metrics
+                frame_time = time.time() - frame_start_time
+                self._frame_times.append(frame_time)
+                if len(self._frame_times) > 10:  # Keep last 10 frame times
+                    self._frame_times.pop(0)
+                # Log GPU memory every 10 frames
+                if len(self._frame_times) % 10 == 0:
+                    self._log_gpu_memory()
+                return alpha
+        except torch.cuda.OutOfMemoryError:
+            self._log_gpu_memory()
+            raise MatAnyError("CUDA out of memory. Try reducing the input resolution or batch size.")
+        except RuntimeError as e:
+            if "CUDA" in str(e):
+                self._log_gpu_memory()
+                raise MatAnyError(f"CUDA error: {e}")
+            raise MatAnyError(f"Runtime error: {e}")
+        except Exception as e:
+            raise MatAnyError(f"Processing failed: {e}")
         # --- Process output ---
         # Convert to numpy and ensure correct shape/range
         """Process video stream with MatAnyone.
         Args:
+            video_path: Input video file path (must exist and be readable)
             seed_mask_path: Optional seed mask image (grayscale, same size as video)
             out_dir: Output directory (default: video_path.parent)
             progress_cb: Callback for progress updates (signature: (float, str) or (str,))
         Returns:
             Tuple of (alpha_path, fg_path) output video paths
+        Raises:
+            MatAnyError: If processing fails for any reason
+            FileNotFoundError: If input files are not found
+            ValueError: If input parameters are invalid
         """
+        # Input validation
+        if not video_path.exists():
+            raise FileNotFoundError(f"Input video not found: {video_path}")
+        if seed_mask_path is not None and not seed_mask_path.exists():
+            raise FileNotFoundError(f"Seed mask not found: {seed_mask_path}")
         if out_dir is None:
             out_dir = video_path.parent
         out_dir = Path(out_dir)
         out_dir.mkdir(parents=True, exist_ok=True)
+        # Initialize progress tracking
+        self._frame_times = []
+        self._start_time = time.time()
+        _emit_progress(progress_cb, 0.0, "Initializing video processing...")
+        # Log GPU status
+        if torch.cuda.is_available():
+            _emit_progress(progress_cb, 0.01, "GPU detected, initializing CUDA...")
+        else:
+            _emit_progress(progress_cb, 0.01, "No GPU detected, using CPU (slower)...")
         cap = cv2.VideoCapture(str(video_path))
         if not cap.isOpened():
         cap.release()
         log.info(f"[MATANY] Processing {N} frames ({W}x{H} @ {fps:.1f}fps) from {video_path}")
+        _emit_progress(progress_cb, 0.05, f"Processing {N} frames ({W}x{H} @ {fps:.1f}fps)")
         if self._api_mode == "process_video":
             # --- PATH-BASED CALL (this wheel expects a video path, not tensors) ---
+            _emit_progress(progress_cb, 0.1, "Using MatAnyone video mode (GPU-accelerated)")
+            # Log before starting video processing
+            if torch.cuda.is_available():
+                mem_alloc, _ = self._log_gpu_memory()
+                _emit_progress(progress_cb, 0.12, f"GPU memory before processing: {mem_alloc:.1f}MB")
+                # Some builds accept (video_path, seed_mask_path), others just (video_path)
+                try:
+                    _emit_progress(progress_cb, 0.15, "Starting video processing with mask...")
+                    res = self._core.process_video(
+                        str(video_path),
+                        str(seed_mask_path) if seed_mask_path is not None else None
+                    )
+                except TypeError as e:
+                    if "takes 2 positional arguments but 3 were given" in str(e):
+                        _emit_progress(progress_cb, 0.15, "Starting video processing without mask...")
+                        res = self._core.process_video(str(video_path))
+                    else:
+                        raise
+                # Log after processing
+                if torch.cuda.is_available():
+                    _emit_progress(progress_cb, 0.9, f"Processing complete. GPU memory used: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
+                else:
+                    _emit_progress(progress_cb, 0.9, "Processing complete.")
+                # Normalize output files
+                _emit_progress(progress_cb, 0.95, "Finalizing output files...")
+                alpha_path, fg_path = self._harvest_process_video_output(res, out_dir, base=video_path.stem)
+                _validate_nonempty(alpha_path)
+                _validate_nonempty(fg_path)
+                _emit_progress(progress_cb, 1.0, "Processing complete!")
+                return alpha_path, fg_path
+            except Exception as e:
+                error_msg = f"Error during video processing: {str(e)}"
+                log.error(error_msg, exc_info=True)
+                if torch.cuda.is_available():
+                    error_msg += f"\nGPU Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB allocated"
+                _emit_progress(progress_cb, -1, error_msg)
+                raise MatAnyError(error_msg) from e
         else:
             # Frame-by-frame (preferred)
             log.info(f"[MATANY] Using frame-by-frame mode: {self._api_mode}")
+            _emit_progress(progress_cb, 0.1, f"Using {self._api_mode} mode (frame-by-frame)")
             cap = cv2.VideoCapture(str(video_path))
             alpha_path = out_dir / "alpha.mp4"
             fg_path = out_dir / "fg.mp4"
+            # Initialize video writers
+            _emit_progress(progress_cb, 0.12, "Initializing video writers...")
             alpha_writer = cv2.VideoWriter(
                 str(alpha_path),
                 cv2.VideoWriter_fourcc(*'mp4v'),
                 (W, H),
                 isColor=True
             )
+            if not alpha_writer.isOpened() or not fg_writer.isOpened():
+                raise MatAnyError("Failed to initialize video writers")
             try:
                 # Load seed mask if provided
                     seed_1hw = _read_mask_hw(seed_mask_path, (H, W))
                 idx = 0
+                last_progress_update = 0
+                frame_times = []
+                start_time = time.time()
+                try:
+                    while True:
+                        ret, frame = cap.read()
+                        if not ret:
+                            break
+                        frame_start_time = time.time()
+                        # Update progress more frequently (every 1% or 5 frames, whichever is more frequent)
+                        current_progress = (idx / N) if N > 0 else 0.0
+                        if idx % max(5, N//100) == 0 or time.time() - last_progress_update > 2.0:
+                            # Calculate progress metrics
+                            elapsed = time.time() - start_time
+                            if idx > 0 and current_progress > 0:
+                                # Calculate ETA
+                                eta_seconds = (elapsed / current_progress) * (1 - current_progress)
+                                if eta_seconds > 3600:
+                                    eta_str = f"{eta_seconds/3600:.1f} hours"
+                                elif eta_seconds > 60:
+                                    eta_str = f"{eta_seconds/60:.1f} minutes"
+                                else:
+                                    eta_str = f"{eta_seconds:.0f} seconds"
+                                # Calculate processing speed
+                                fps = idx / elapsed if elapsed > 0 else 0
+                                # Add GPU memory info if available
+                                gpu_info = ""
+                                if torch.cuda.is_available():
+                                    mem_alloc = torch.cuda.memory_allocated() / 1024**2
+                                    mem_cached = torch.cuda.memory_reserved() / 1024**2
+                                    gpu_info = f" | GPU: {mem_alloc:.1f}/{mem_cached:.1f}MB"
+                                status = (f"Processing frame {idx+1}/{N} (ETA: {eta_str}, "
+                                        f"{fps:.1f} FPS{gpu_info}")
+                                _emit_progress(progress_cb, min(0.99, current_progress), status)
+                                last_progress_update = time.time()
+                        # Process frame
+                        log.debug(f"[MATANY] Processing frame {idx+1}/{N}")
+                        # Only pass seed mask on first frame
+                        current_mask = seed_1hw if idx == 0 else None
+                        alpha_hw = self._run_frame(frame, current_mask, is_first=(idx == 0))
+                        # Calculate frame processing time
+                        frame_time = time.time() - frame_start_time
+                        frame_times.append(frame_time)
+                        if len(frame_times) > 10:  # Keep last 10 frame times for average
+                            frame_times.pop(0)
+                        # Log GPU memory usage occasionally
+                        if idx % 50 == 0 and torch.cuda.is_available():
+                            log.info(f"[GPU] Memory allocated: {torch.cuda.memory_allocated()/1024**2:.1f}MB, "
+                                   f"Cached: {torch.cuda.memory_reserved()/1024**2:.1f}MB, "
+                                   f"Avg frame time: {sum(frame_times)/len(frame_times)*1000:.1f}ms")
+                        # Compose output frames
+                        alpha_u8 = (alpha_hw * 255.0 + 0.5).astype(np.uint8)
+                        alpha_rgb = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
+                        fg_bgr = (frame.astype(np.float32) * (alpha_hw[..., None] / 255.0)).astype(np.uint8)
+                        # Write outputs
+                        alpha_writer.write(alpha_rgb)
+                        fg_writer.write(fg_bgr)
+                        idx += 1
+                except Exception as e:
+                    # Log detailed error information
+                    error_msg = f"Error processing frame {idx+1}/{N}: {str(e)}"
+                    log.error(error_msg, exc_info=True)
+                    # Add GPU memory info if available
+                    if torch.cuda.is_available():
+                        mem_alloc = torch.cuda.memory_allocated() / 1024**2
+                        mem_cached = torch.cuda.memory_reserved() / 1024**2
+                        error_msg += (f"\nGPU Memory - Allocated: {mem_alloc:.1f}MB, "
+                                    f"Cached: {mem_cached:.1f}MB")
+                    # Add frame processing stats
+                    if self._frame_times:
+                        avg_time = sum(self._frame_times) / len(self._frame_times)
+                        error_msg += f"\nAvg frame time: {avg_time*1000:.1f}ms"
+                    _emit_progress(progress_cb, -1, f"ERROR: {error_msg}")
+                    raise MatAnyError(error_msg) from e
+                finally:
+                    # Cleanup resources
+                    try:
+                        if 'cap' in locals() and cap.isOpened():
+                            cap.release()
+                        if 'alpha_writer' in locals() and alpha_writer is not None:
+                            if hasattr(alpha_writer, 'isOpened') and alpha_writer.isOpened():
+                                alpha_writer.release()
+                        if 'fg_writer' in locals() and fg_writer is not None:
+                            if hasattr(fg_writer, 'isOpened') and fg_writer.isOpened():
+                                fg_writer.release()
+                        # Log final stats
+                        total_time = time.time() - start_time
+                        fps = idx / total_time if total_time > 0 else 0
+                        # Log GPU memory info if available
+                        gpu_info = ""
+                        if torch.cuda.is_available():
+                            mem_alloc = torch.cuda.memory_allocated() / 1024**2
+                            mem_cached = torch.cuda.memory_reserved() / 1024**2
+                            gpu_info = f"\nGPU Memory - Allocated: {mem_alloc:.1f}MB, Cached: {mem_cached:.1f}MB"
+                        log.info(
+                            f"[MATANY] Processed {idx} frames in {total_time:.1f}s ({fps:.1f} FPS){gpu_info}"
+                        )
+                        # Validate outputs
+                        _validate_nonempty(alpha_path)
+                        _validate_nonempty(fg_path)
+                        # Final progress update
+                        _emit_progress(
+                            progress_cb,
+                            1.0,
+                            f"Complete! Processed {idx} frames at {fps:.1f} FPS{gpu_info}"
+                        )
+                        return alpha_path, fg_path
+                    except Exception as e:
+                        error_msg = f"Error during cleanup: {str(e)}"
+                        log.error(error_msg, exc_info=True)
+                        _emit_progress(progress_cb, -1, f"CLEANUP ERROR: {error_msg}")
+                        raise MatAnyError(error_msg) from e
     def _flush_chunk(self, frames_bgr, seed_1hw, alpha_writer, fg_writer):
         """Process a chunk of frames with MatAnyone."""