Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 15, 2025

Commit

b502144

1 Parent(s): a9f51ee

agent 1.7

Browse files

Files changed (1) hide show

models/matanyone_loader.py +60 -16

models/matanyone_loader.py CHANGED Viewed

@@ -807,27 +807,71 @@ def process_stream(
                         raise MatAnyError(error_msg) from e
     def _flush_chunk(self, frames_bgr, seed_1hw, alpha_writer, fg_writer):
-        """Process a chunk of frames with MatAnyone."""
         if not frames_bgr:
             return
-        # Prepare inputs
-        frames_chw = [_to_chw01(f) for f in frames_bgr]                     # list of CHW
-        frames_t   = torch.from_numpy(np.stack(frames_chw)).to(self.device) # T,C,H,W
-        mask_t     = torch.from_numpy(seed_1hw).to(self.device) if seed_1hw is not None else None
-        with torch.no_grad(), self._maybe_amp():
             try:
-                # Try direct tensor processing first (newer versions)
-                if hasattr(self._core, '_process_tensor_video'):
-                    alphas = self._core._process_tensor_video(frames_t, mask_t)
-                else:
-                    # Fall back to file-based processing if tensor API not available
-                    with tempfile.TemporaryDirectory() as tmpdir:
-                        # Save frames to temp directory
-                        frame_paths = []
-                        for i, frame in enumerate(frames_bgr):
-                            path = os.path.join(tmpdir, f'frame_{i:06d}.png')
                             cv2.imwrite(path, frame)
                             frame_paths.append(path)

                         raise MatAnyError(error_msg) from e
     def _flush_chunk(self, frames_bgr, seed_1hw, alpha_writer, fg_writer):
+        """Process a chunk of frames with MatAnyone.
+        Args:
+            frames_bgr: List of frames in BGR format
+            seed_1hw: Seed mask in 1HW format or None
+            alpha_writer: VideoWriter for alpha channel output
+            fg_writer: VideoWriter for foreground output
+        Raises:
+            MatAnyError: If there's an error processing the frames
+        """
         if not frames_bgr:
             return
+        try:
+            # Prepare inputs
+            frames_chw = [_to_chw01(f) for f in frames_bgr]  # list of CHW
+            frames_t = torch.from_numpy(np.stack(frames_chw)).to(self.device)  # T,C,H,W
+            mask_t = None
+            if seed_1hw is not None:
+                mask_t = torch.from_numpy(seed_1hw).to(self.device)
             try:
+                with torch.no_grad(), self._maybe_amp():
+                    # Process frames in batch
+                    if self._api_mode == "process_frame":
+                        alphas = []
+                        for i in range(len(frames_t)):
+                            # Only use mask on first frame if provided
+                            current_mask = mask_t if (i == 0 and mask_t is not None) else None
+                            alpha = self._core.process_frame(frames_t[i].unsqueeze(0), current_mask)
+                            alphas.append(alpha.squeeze(0))
+                        alphas = torch.stack(alphas)
+                    elif hasattr(self._core, '_process_tensor_video'):
+                        # Try direct tensor processing (newer versions)
+                        alphas = self._core._process_tensor_video(frames_t, mask_t)
+                    else:  # step mode
+                        alphas = self._core.step(frames_t, mask_t)
+                    # Convert to numpy and write frames
+                    alphas_np = alphas.cpu().numpy()
+                    for i, alpha in enumerate(alphas_np):
+                        # Convert alpha to uint8 and write
+                        alpha_uint8 = (alpha * 255).astype(np.uint8)
+                        if len(alpha_uint8.shape) == 2:  # If single channel, convert to 3 channels
+                            alpha_uint8 = cv2.cvtColor(alpha_uint8, cv2.COLOR_GRAY2BGR)
+                        alpha_writer.write(alpha_uint8)
+                        # Write foreground (frame * alpha)
+                        fg = frames_bgr[i] * (alpha[..., None] if alpha.ndim == 2 else alpha[0:1].permute(1, 2, 0))
+                        fg = fg.astype(np.uint8)
+                        fg_writer.write(fg)
+            except RuntimeError as e:
+                if "out of memory" in str(e).lower():
+                    # Clear CUDA cache and retry once
+                    torch.cuda.empty_cache()
+                    log.warning("CUDA out of memory, retrying after cache clear")
+                    return self._flush_chunk(frames_bgr, seed_1hw, alpha_writer, fg_writer)
+                raise
+        except Exception as e:
+            error_msg = f"Error processing frame chunk: {str(e)}"
+            log.error(error_msg, exc_info=True)
+            raise MatAnyError(error_msg) from e
                             cv2.imwrite(path, frame)
                             frame_paths.append(path)