Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 15, 2025

Commit

a9f51ee

1 Parent(s): bd64f57

agent 1.6

Browse files

Files changed (1) hide show

models/matanyone_loader.py +212 -40

models/matanyone_loader.py CHANGED Viewed

@@ -417,51 +417,223 @@ def process_stream(
         log.info(f"[MATANY] Processing {N} frames ({W}x{H} @ {fps:.1f}fps) from {video_path}")
         _emit_progress(progress_cb, 0.05, f"Processing {N} frames ({W}x{H} @ {fps:.1f}fps)")
-        if self._api_mode == "process_video":
-            # --- PATH-BASED CALL (this wheel expects a video path, not tensors) ---
-            _emit_progress(progress_cb, 0.1, "Using MatAnyone video mode (GPU-accelerated)")
-            # Log before starting video processing
-            if torch.cuda.is_available():
-                mem_alloc, _ = self._log_gpu_memory()
-                _emit_progress(progress_cb, 0.12, f"GPU memory before processing: {mem_alloc:.1f}MB")
-                # Some builds accept (video_path, seed_mask_path), others just (video_path)
-                try:
-                    _emit_progress(progress_cb, 0.15, "Starting video processing with mask...")
-                    res = self._core.process_video(
-                        str(video_path),
-                        str(seed_mask_path) if seed_mask_path is not None else None
-                    )
-                except TypeError as e:
-                    if "takes 2 positional arguments but 3 were given" in str(e):
-                        _emit_progress(progress_cb, 0.15, "Starting video processing without mask...")
-                        res = self._core.process_video(str(video_path))
-                    else:
-                        raise
-                # Log after processing
                 if torch.cuda.is_available():
-                    _emit_progress(progress_cb, 0.9, f"Processing complete. GPU memory used: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
-                else:
-                    _emit_progress(progress_cb, 0.9, "Processing complete.")
-                # Normalize output files
-                _emit_progress(progress_cb, 0.95, "Finalizing output files...")
-                alpha_path, fg_path = self._harvest_process_video_output(res, out_dir, base=video_path.stem)
-                _validate_nonempty(alpha_path)
-                _validate_nonempty(fg_path)
-                _emit_progress(progress_cb, 1.0, "Processing complete!")
-                return alpha_path, fg_path
-            except Exception as e:
-                error_msg = f"Error during video processing: {str(e)}"
-                log.error(error_msg, exc_info=True)
-                if torch.cuda.is_available():
-                    error_msg += f"\nGPU Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB allocated"
-                _emit_progress(progress_cb, -1, error_msg)
-                raise MatAnyError(error_msg) from e
         else:
             # Frame-by-frame (preferred)
             log.info(f"[MATANY] Using frame-by-frame mode: {self._api_mode}")

         log.info(f"[MATANY] Processing {N} frames ({W}x{H} @ {fps:.1f}fps) from {video_path}")
         _emit_progress(progress_cb, 0.05, f"Processing {N} frames ({W}x{H} @ {fps:.1f}fps)")
+        try:
+            if self._api_mode == "process_video":
+                # --- PATH-BASED CALL (this wheel expects a video path, not tensors) ---
+                _emit_progress(progress_cb, 0.1, "Using MatAnyone video mode (GPU-accelerated)")
+                # Log before starting video processing
                 if torch.cuda.is_available():
+                    mem_alloc, _ = self._log_gpu_memory()
+                    _emit_progress(progress_cb, 0.12, f"GPU memory before processing: {mem_alloc:.1f}MB")
+                    # Some builds accept (video_path, seed_mask_path), others just (video_path)
+                    try:
+                        _emit_progress(progress_cb, 0.15, "Starting video processing with mask...")
+                        res = self._core.process_video(
+                            str(video_path),
+                            str(seed_mask_path) if seed_mask_path is not None else None
+                        )
+                    except TypeError as e:
+                        if "takes 2 positional arguments but 3 were given" in str(e):
+                            _emit_progress(progress_cb, 0.15, "Starting video processing without mask...")
+                            res = self._core.process_video(str(video_path))
+                        else:
+                            raise
+                    # Log after processing
+                    if torch.cuda.is_available():
+                        _emit_progress(progress_cb, 0.9, f"Processing complete. GPU memory used: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
+                    else:
+                        _emit_progress(progress_cb, 0.9, "Processing complete.")
+                    # Normalize output files
+                    _emit_progress(progress_cb, 0.95, "Finalizing output files...")
+                    alpha_path, fg_path = self._harvest_process_video_output(res, out_dir, base=video_path.stem)
+                    _validate_nonempty(alpha_path)
+                    _validate_nonempty(fg_path)
+                    _emit_progress(progress_cb, 1.0, "Processing complete!")
+                    return alpha_path, fg_path
+            else:
+                # Frame-by-frame (preferred)
+                log.info(f"[MATANY] Using frame-by-frame mode: {self._api_mode}")
+                _emit_progress(progress_cb, 0.1, f"Using {self._api_mode} mode (frame-by-frame)")
+                cap = cv2.VideoCapture(str(video_path))
+                alpha_path = out_dir / "alpha.mp4"
+                fg_path = out_dir / "fg.mp4"
+                # Initialize video writers
+                _emit_progress(progress_cb, 0.12, "Initializing video writers...")
+                alpha_writer = cv2.VideoWriter(
+                    str(alpha_path),
+                    cv2.VideoWriter_fourcc(*'mp4v'),
+                    fps,
+                    (W, H),
+                    isColor=False
+                )
+                fg_writer = cv2.VideoWriter(
+                    str(fg_path),
+                    cv2.VideoWriter_fourcc(*'mp4v'),
+                    fps,
+                    (W, H),
+                    isColor=True
+                )
+                if not alpha_writer.isOpened() or not fg_writer.isOpened():
+                    raise MatAnyError("Failed to initialize video writers")
+                try:
+                    # Load seed mask if provided
+                    seed_1hw = None
+                    if seed_mask_path is not None:
+                        seed_1hw = _read_mask_hw(seed_mask_path, (H, W))
+                    idx = 0
+                    last_progress_update = 0
+                    frame_times = []
+                    start_time = time.time()
+                    while True:
+                        ret, frame = cap.read()
+                        if not ret:
+                            break
+                        frame_start_time = time.time()
+                        # Update progress more frequently (every 1% or 5 frames, whichever is more frequent)
+                        current_progress = (idx / N) if N > 0 else 0.0
+                        if idx % max(5, N//100) == 0 or time.time() - last_progress_update > 2.0:
+                            # Calculate progress metrics
+                            elapsed = time.time() - start_time
+                            if idx > 0 and current_progress > 0:
+                                # Calculate ETA
+                                eta_seconds = (elapsed / current_progress) * (1 - current_progress)
+                                if eta_seconds > 3600:
+                                    eta_str = f"{eta_seconds/3600:.1f} hours"
+                                elif eta_seconds > 60:
+                                    eta_str = f"{eta_seconds/60:.1f} minutes"
+                                else:
+                                    eta_str = f"{eta_seconds:.0f} seconds"
+                                # Calculate processing speed
+                                fps = idx / elapsed if elapsed > 0 else 0
+                                # Add GPU memory info if available
+                                gpu_info = ""
+                                if torch.cuda.is_available():
+                                    mem_alloc = torch.cuda.memory_allocated() / 1024**2
+                                    mem_cached = torch.cuda.memory_reserved() / 1024**2
+                                    gpu_info = f" | GPU: {mem_alloc:.1f}/{mem_cached:.1f}MB"
+                                status = (f"Processing frame {idx+1}/{N} (ETA: {eta_str}, "
+                                         f"{fps:.1f} FPS{gpu_info}")
+                                _emit_progress(progress_cb, min(0.99, current_progress), status)
+                                last_progress_update = time.time()
+                        # Process frame
+                        log.debug(f"[MATANY] Processing frame {idx+1}/{N}")
+                        # Only pass seed mask on first frame
+                        current_mask = seed_1hw if idx == 0 else None
+                        alpha_hw = self._run_frame(frame, current_mask, is_first=(idx == 0))
+                        # Calculate frame processing time
+                        frame_time = time.time() - frame_start_time
+                        frame_times.append(frame_time)
+                        if len(frame_times) > 10:  # Keep last 10 frame times for average
+                            frame_times.pop(0)
+                        # Log GPU memory usage occasionally
+                        if idx % 50 == 0 and torch.cuda.is_available():
+                            log.info(f"[GPU] Memory allocated: {torch.cuda.memory_allocated()/1024**2:.1f}MB, "
+                                   f"Cached: {torch.cuda.memory_reserved()/1024**2:.1f}MB, "
+                                   f"Avg frame time: {sum(frame_times)/len(frame_times)*1000:.1f}ms")
+                        # Compose output frames
+                        alpha_u8 = (alpha_hw * 255.0 + 0.5).astype(np.uint8)
+                        alpha_rgb = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
+                        fg_bgr = (frame.astype(np.float32) * (alpha_hw[..., None] / 255.0)).astype(np.uint8)
+                        # Write outputs
+                        alpha_writer.write(alpha_rgb)
+                        fg_writer.write(fg_bgr)
+                        idx += 1
+                except Exception as e:
+                    # Log detailed error information
+                    error_msg = f"Error processing frame {idx+1}/{N}: {str(e)}"
+                    log.error(error_msg, exc_info=True)
+                    # Add GPU memory info if available
+                    if torch.cuda.is_available():
+                        mem_alloc = torch.cuda.memory_allocated() / 1024**2
+                        mem_cached = torch.cuda.memory_reserved() / 1024**2
+                        error_msg += (f"\nGPU Memory - Allocated: {mem_alloc:.1f}MB, "
+                                    f"Cached: {mem_cached:.1f}MB")
+                    # Add frame processing stats
+                    if frame_times:
+                        avg_time = sum(frame_times) / len(frame_times)
+                        error_msg += f"\nAvg frame time: {avg_time*1000:.1f}ms"
+                    _emit_progress(progress_cb, -1, f"ERROR: {error_msg}")
+                    raise MatAnyError(error_msg) from e
+                finally:
+                    # Cleanup resources
+                    try:
+                        if 'cap' in locals() and hasattr(cap, 'isOpened') and cap.isOpened():
+                            cap.release()
+                        if 'alpha_writer' in locals() and alpha_writer is not None:
+                            if hasattr(alpha_writer, 'isOpened') and alpha_writer.isOpened():
+                                alpha_writer.release()
+                        if 'fg_writer' in locals() and fg_writer is not None:
+                            if hasattr(fg_writer, 'isOpened') and fg_writer.isOpened():
+                                fg_writer.release()
+                        # Log final stats
+                        total_time = time.time() - start_time
+                        fps = idx / total_time if total_time > 0 else 0
+                        # Log GPU memory info if available
+                        gpu_info = ""
+                        if torch.cuda.is_available():
+                            mem_alloc = torch.cuda.memory_allocated() / 1024**2
+                            mem_cached = torch.cuda.memory_reserved() / 1024**2
+                            gpu_info = f"\nGPU Memory - Allocated: {mem_alloc:.1f}MB, Cached: {mem_cached:.1f}MB"
+                        log.info(
+                            f"[MATANY] Processed {idx} frames in {total_time:.1f}s ({fps:.1f} FPS){gpu_info}"
+                        )
+                        # Validate outputs
+                        _validate_nonempty(alpha_path)
+                        _validate_nonempty(fg_path)
+                        # Final progress update
+                        _emit_progress(
+                            progress_cb,
+                            1.0,
+                            f"Complete! Processed {idx} frames at {fps:.1f} FPS{gpu_info}"
+                        )
+                        return alpha_path, fg_path
+                    except Exception as e:
+                        error_msg = f"Error during cleanup: {str(e)}"
+                        log.error(error_msg, exc_info=True)
+                        _emit_progress(progress_cb, -1, f"CLEANUP ERROR: {error_msg}")
+                        raise MatAnyError(error_msg) from e
+        except Exception as e:
+            error_msg = f"Error during video processing: {str(e)}"
+            log.error(error_msg, exc_info=True)
+            if torch.cuda.is_available():
+                error_msg += f"\nGPU Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB allocated"
+            _emit_progress(progress_cb, -1, error_msg)
+            raise MatAnyError(error_msg) from e
         else:
             # Frame-by-frame (preferred)
             log.info(f"[MATANY] Using frame-by-frame mode: {self._api_mode}")