Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 15, 2025

Commit

63853f9

1 Parent(s): 7c9eed8

fix

Browse files

Files changed (5) hide show

app.py +6 -0
models/__init__.py +8 -2
models/matanyone_loader.py +25 -0
models/sam2_loader.py +32 -1
ui.py +113 -18

app.py CHANGED Viewed

@@ -37,6 +37,12 @@
 print(f"[PATH-CHECK] sys.path[:8] = {sys.path[:8]}")
 print(f"[PATH-CHECK] exists(third_party/sam2) = {sam2_path.exists()}")
 # DEBUG: try importing MatAnyone and show its location
 try:
     import matanyone  # noqa: F401

 print(f"[PATH-CHECK] sys.path[:8] = {sys.path[:8]}")
 print(f"[PATH-CHECK] exists(third_party/sam2) = {sam2_path.exists()}")
+# Force GPU environment variables before any model imports
+os.environ["SAM2_DEVICE"] = "cuda"
+os.environ["MATANY_DEVICE"] = "cuda"
+os.environ["CUDA_LAUNCH_BLOCKING"] = "0"  # Allow async CUDA operations
+print(f"[GPU-FORCE] Set SAM2_DEVICE=cuda, MATANY_DEVICE=cuda", flush=True)
 # DEBUG: try importing MatAnyone and show its location
 try:
     import matanyone  # noqa: F401

models/__init__.py CHANGED Viewed

@@ -103,9 +103,15 @@ def _pick_device(env_key: str) -> str:
     logger.info(f"CUDA environment variables: {cuda_env_vars}")
     logger.info(f"_pick_device({env_key}): requested='{requested}', has_cuda={has_cuda}")
-    if requested in {"cuda", "cpu"}:
-        logger.info(f"Using requested device: {requested}")
         return requested
     result = "cuda" if has_cuda else "cpu"
     logger.info(f"Auto-selected device: {result}")
     return result

     logger.info(f"CUDA environment variables: {cuda_env_vars}")
     logger.info(f"_pick_device({env_key}): requested='{requested}', has_cuda={has_cuda}")
+    # Force CUDA if available (empty string counts as no explicit CPU request)
+    if has_cuda and requested not in {"cpu"}:
+        logger.info(f"FORCING CUDA device (GPU available, requested='{requested}')")
+        return "cuda"
+    elif requested in {"cuda", "cpu"}:
+        logger.info(f"Using explicitly requested device: {requested}")
         return requested
     result = "cuda" if has_cuda else "cpu"
     logger.info(f"Auto-selected device: {result}")
     return result

models/matanyone_loader.py CHANGED Viewed

@@ -71,7 +71,24 @@ def load(self) -> bool:
             # ✅ top-level wrapper (accepts model/repo id string)
             from matanyone import InferenceCore
             logger.info("[MatA] init: repo_id=%s device=%s", self.repo_id, self.device)
             self.core = InferenceCore(self.repo_id)
             self.loaded = True
             logger.info("[MatA] init OK (%.2fs)", time.time() - t0)
             return True
@@ -85,8 +102,16 @@ def load(self) -> bool:
     def step(self, image: np.ndarray | torch.Tensor, seed_mask: np.ndarray | torch.Tensor) -> np.ndarray:
         if not self.loaded or self.core is None:
             raise RuntimeError("MatAnyone not loaded")
         img = _to_chw_float01(image).to(self.device, non_blocking=True)
         msk = _to_1hw_float01(seed_mask).to(self.device, non_blocking=True)
         out = self.core.step(img, msk)
         alpha = out[0] if isinstance(out, (tuple, list)) else out
         if not isinstance(alpha, torch.Tensor):

             # ✅ top-level wrapper (accepts model/repo id string)
             from matanyone import InferenceCore
             logger.info("[MatA] init: repo_id=%s device=%s", self.repo_id, self.device)
+            # Force GPU device if CUDA available
+            if torch.cuda.is_available() and self.device != "cpu":
+                self.device = "cuda"
+                logger.info("[MatA] FORCING CUDA device for GPU acceleration")
             self.core = InferenceCore(self.repo_id)
+            # Verify MatAnyone is using GPU if available
+            if hasattr(self.core, 'device'):
+                actual_device = getattr(self.core, 'device', 'unknown')
+                logger.info(f"[MatA] device verification: expected={self.device}, actual={actual_device}")
+            # Try to move core to device if it has a 'to' method
+            if hasattr(self.core, 'to'):
+                self.core = self.core.to(self.device)
+                logger.info(f"[MatA] moved core to device: {self.device}")
             self.loaded = True
             logger.info("[MatA] init OK (%.2fs)", time.time() - t0)
             return True
     def step(self, image: np.ndarray | torch.Tensor, seed_mask: np.ndarray | torch.Tensor) -> np.ndarray:
         if not self.loaded or self.core is None:
             raise RuntimeError("MatAnyone not loaded")
+        # Force GPU device for tensors
+        if torch.cuda.is_available():
+            self.device = "cuda"
         img = _to_chw_float01(image).to(self.device, non_blocking=True)
         msk = _to_1hw_float01(seed_mask).to(self.device, non_blocking=True)
+        # Verify tensors are on GPU
+        logger.info(f"[MatA] step: img device={img.device}, mask device={msk.device}, target device={self.device}")
         out = self.core.step(img, msk)
         alpha = out[0] if isinstance(out, (tuple, list)) else out
         if not isinstance(alpha, torch.Tensor):

models/sam2_loader.py CHANGED Viewed

@@ -56,7 +56,21 @@ def load(self, variant: str = DEFAULT_VARIANT, model_id: str = DEFAULT_MODEL_ID)
         log.info("Calling build_sam2()...")
         model = build_sam2(config_file=full_cfg_path, ckpt_path=ckpt, device=str(self.device))
         log.info("build_sam2() completed successfully")
         model.eval()
         self.model = model
         try:
@@ -75,13 +89,30 @@ def first_frame_mask(self, image_rgb01):
         Returns an initial binary-ish mask for the foreground subject from first frame.
         You can refine prompts here (points/boxes) if you add UI hooks later.
         """
         if hasattr(self.predictor, "set_image"):
-            self.predictor.set_image((image_rgb01*255).astype("uint8"))
             # simple auto-box prompt (tight box)
             h, w = image_rgb01.shape[:2]
             box = np.array([1, 1, w-2, h-2])
             masks, _, _ = self.predictor.predict(box=box, multimask_output=False)
             mask = masks[0]  # HxW bool/float
         else:
             # video predictor path: run_single_frame if available
             mask = (image_rgb01[...,0] > -1)  # dummy, should not happen

         log.info("Calling build_sam2()...")
         model = build_sam2(config_file=full_cfg_path, ckpt_path=ckpt, device=str(self.device))
         log.info("build_sam2() completed successfully")
+        # Explicitly move model to device and verify
+        model = model.to(self.device)
         model.eval()
+        # Verify model is on correct device
+        if hasattr(model, 'parameters'):
+            first_param = next(model.parameters(), None)
+            if first_param is not None:
+                actual_device = first_param.device
+                log.info(f"SAM2 model device verification: expected={self.device}, actual={actual_device}")
+                if str(actual_device) != str(self.device):
+                    log.warning(f"SAM2 model device mismatch! Moving to {self.device}")
+                    model = model.to(self.device)
         self.model = model
         try:
         Returns an initial binary-ish mask for the foreground subject from first frame.
         You can refine prompts here (points/boxes) if you add UI hooks later.
         """
+        # Ensure input tensor is on correct device
+        if isinstance(image_rgb01, torch.Tensor):
+            image_rgb01 = image_rgb01.to(self.device, non_blocking=True)
         if hasattr(self.predictor, "set_image"):
+            # Convert to numpy for predictor if needed
+            if isinstance(image_rgb01, torch.Tensor):
+                image_np = (image_rgb01.cpu().numpy() * 255).astype("uint8")
+            else:
+                image_np = (image_rgb01 * 255).astype("uint8")
+            self.predictor.set_image(image_np)
             # simple auto-box prompt (tight box)
             h, w = image_rgb01.shape[:2]
             box = np.array([1, 1, w-2, h-2])
             masks, _, _ = self.predictor.predict(box=box, multimask_output=False)
             mask = masks[0]  # HxW bool/float
+            # Keep model on GPU - verify device after prediction
+            if hasattr(self.model, 'parameters'):
+                first_param = next(self.model.parameters(), None)
+                if first_param is not None and str(first_param.device) != str(self.device):
+                    log.warning(f"SAM2 model moved off GPU during prediction! Moving back to {self.device}")
+                    self.model = self.model.to(self.device)
         else:
             # video predictor path: run_single_frame if available
             mask = (image_rgb01[...,0] > -1)  # dummy, should not happen

ui.py CHANGED Viewed

@@ -224,19 +224,39 @@ def process_video_with_background_stoppable(
             bg_img.save(tmp_bg.name, format="PNG")
             bg_path = tmp_bg.name
-        # Run pipeline with immediate diagnostic logging
-        yield gr.update(visible=False), gr.update(visible=True), None, "🔄 Initializing pipeline..."
         logger.info(f"=== PIPELINE START ===")
-        # GPU diagnostics first
         try:
             import torch
             logger.info(f"✅ Torch version: {torch.__version__}")
             logger.info(f"✅ CUDA available: {torch.cuda.is_available()}")
             if torch.cuda.is_available():
-                logger.info(f"✅ CUDA device count: {torch.cuda.device_count()}")
-                logger.info(f"✅ Current device: {torch.cuda.current_device()}")
-                logger.info(f"✅ Device name: {torch.cuda.get_device_name()}")
             else:
                 logger.error(f"❌ CUDA NOT AVAILABLE - GPU processing will fail")
                 yield gr.update(visible=True), gr.update(visible=False), None, "❌ GPU not available - processing will fail"
@@ -246,12 +266,15 @@ def process_video_with_background_stoppable(
             yield gr.update(visible=True), gr.update(visible=False), None, f"GPU check error: {e}"
             return
         logger.info(f"About to import pipeline module...")
         try:
             pipe = importlib.import_module("pipeline")
             logger.info(f"✅ Pipeline module imported successfully")
-            yield gr.update(visible=False), gr.update(visible=True), None, "📹 Starting video processing..."
         except Exception as e:
             logger.error(f"❌ Pipeline import failed: {e}")
             yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline import error: {e}"
@@ -260,14 +283,44 @@ def process_video_with_background_stoppable(
         logger.info(f"Calling pipe.process with video_path={video_path}, bg_path={bg_path}")
         logger.info(f"=== CALLING PIPELINE.PROCESS ===")
         try:
             out_path, diag = pipe.process(
                 video_path=video_path,
                 bg_image_path=bg_path,
                 point_x=None,
                 point_y=None,
                 auto_box=True,
-                work_dir=None
             )
             logger.info(f"=== PIPELINE.PROCESS RETURNED ===")
             logger.info(f"Pipeline completed: out_path={out_path}, diag={diag}")
@@ -275,25 +328,67 @@ def process_video_with_background_stoppable(
             logger.error(f"❌ Pipeline.process failed: {e}")
             import traceback
             logger.error(f"Full traceback: {traceback.format_exc()}")
-            yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline processing error: {e}"
             return
         if out_path:
-            # Show final processing stats in success message
             fps = diag.get('fps', 'unknown')
             resolution = diag.get('resolution', 'unknown')
             sam2_ok = diag.get('sam2_ok', False)
             matany_ok = diag.get('matany_ok', False)
             processing_time = diag.get('total_time_sec', 0)
-            status_msg = f"✅ Processing complete! "
-            if fps != 'unknown' and resolution != 'unknown':
-                status_msg += f"Video: {resolution} @ {fps}fps, "
-            status_msg += f"SAM2: {'✓' if sam2_ok else '✗'}, MatAnyone: {'✓' if matany_ok else '✗'}"
-            if processing_time > 0:
-                status_msg += f", Time: {processing_time:.1f}s"
         else:
-            status_msg = f"❌ Processing failed: {diag.get('error','unknown error')}"
         if STOP.stop:
             yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
@@ -376,7 +471,7 @@ def create_interface():
             with gr.Column(scale=1):
                 gr.Markdown("## Results")
                 result_video = gr.Video(label="Processed Video", height=400)
-                status_output = gr.Textbox(label="Processing Status", lines=5, max_lines=10, elem_classes=["status-box"])
                 gr.Markdown("""
                 ### Pipeline
                 1. SAM2 Segmentation → mask

             bg_img.save(tmp_bg.name, format="PNG")
             bg_path = tmp_bg.name
+        # Run pipeline with enhanced real-time status updates
+        yield gr.update(visible=False), gr.update(visible=True), None, "🔄 Initializing pipeline...\n⚡ Checking GPU acceleration..."
         logger.info(f"=== PIPELINE START ===")
+        # Enhanced GPU diagnostics with detailed status
         try:
             import torch
             logger.info(f"✅ Torch version: {torch.__version__}")
             logger.info(f"✅ CUDA available: {torch.cuda.is_available()}")
             if torch.cuda.is_available():
+                device_count = torch.cuda.device_count()
+                current_device = torch.cuda.current_device()
+                device_name = torch.cuda.get_device_name()
+                device_capability = torch.cuda.get_device_capability()
+                # Get GPU memory info
+                memory_allocated = torch.cuda.memory_allocated() / (1024**3)  # GB
+                memory_reserved = torch.cuda.memory_reserved() / (1024**3)   # GB
+                memory_total = torch.cuda.get_device_properties(current_device).total_memory / (1024**3)  # GB
+                gpu_status = f"""✅ GPU Acceleration Active
+🖥️  Device: {device_name} (Compute {device_capability[0]}.{device_capability[1]})
+💾 Memory: {memory_allocated:.1f}GB allocated / {memory_total:.1f}GB total
+🔧 CUDA {torch.version.cuda} | PyTorch {torch.__version__}
+📊 Ready for SAM2 + MatAnyone processing..."""
+                logger.info(f"✅ CUDA device count: {device_count}")
+                logger.info(f"✅ Current device: {current_device}")
+                logger.info(f"✅ Device name: {device_name}")
+                logger.info(f"✅ GPU memory: {memory_allocated:.1f}GB/{memory_total:.1f}GB")
+                yield gr.update(visible=False), gr.update(visible=True), None, gpu_status
             else:
                 logger.error(f"❌ CUDA NOT AVAILABLE - GPU processing will fail")
                 yield gr.update(visible=True), gr.update(visible=False), None, "❌ GPU not available - processing will fail"
             yield gr.update(visible=True), gr.update(visible=False), None, f"GPU check error: {e}"
             return
+        yield gr.update(visible=False), gr.update(visible=True), None, gpu_status + "\n\n🔄 Loading pipeline modules..."
         logger.info(f"About to import pipeline module...")
         try:
             pipe = importlib.import_module("pipeline")
             logger.info(f"✅ Pipeline module imported successfully")
+            pipeline_status = gpu_status + "\n\n✅ Pipeline modules loaded\n📹 Initializing video processing pipeline..."
+            yield gr.update(visible=False), gr.update(visible=True), None, pipeline_status
         except Exception as e:
             logger.error(f"❌ Pipeline import failed: {e}")
             yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline import error: {e}"
         logger.info(f"Calling pipe.process with video_path={video_path}, bg_path={bg_path}")
         logger.info(f"=== CALLING PIPELINE.PROCESS ===")
+        # Enhanced status during processing
+        processing_status = gpu_status + "\n\n🚀 PROCESSING STARTED\n⏱️  Stage 1: Video analysis & SAM2 segmentation..."
+        yield gr.update(visible=False), gr.update(visible=True), None, processing_status
         try:
+            # Create a progress callback for real-time updates
+            def progress_callback(stage, message, progress_pct=None):
+                nonlocal processing_status
+                import time
+                timestamp = time.strftime("%H:%M:%S")
+                if stage == "sam2_loading":
+                    processing_status = gpu_status + f"\n\n🚀 PROCESSING STARTED\n⏱️  [{timestamp}] Stage 1: Loading SAM2 model..."
+                elif stage == "sam2_processing":
+                    processing_status = gpu_status + f"\n\n🚀 PROCESSING STARTED\n⏱️  [{timestamp}] Stage 1: SAM2 segmentation in progress..."
+                elif stage == "matanyone_loading":
+                    processing_status = gpu_status + f"\n\n🚀 PROCESSING STARTED\n✅ Stage 1: SAM2 complete\n⏱️  [{timestamp}] Stage 2: Loading MatAnyone model..."
+                elif stage == "matanyone_processing":
+                    processing_status = gpu_status + f"\n\n🚀 PROCESSING STARTED\n✅ Stage 1: SAM2 complete\n⏱️  [{timestamp}] Stage 2: MatAnyone video matting..."
+                elif stage == "compositing":
+                    processing_status = gpu_status + f"\n\n🚀 PROCESSING STARTED\n✅ Stage 1: SAM2 complete\n✅ Stage 2: MatAnyone complete\n⏱️  [{timestamp}] Stage 3: Final compositing..."
+                if progress_pct:
+                    processing_status += f" ({progress_pct}%)"
+                if message:
+                    processing_status += f"\n💬 {message}"
+                # Note: We can't yield from callback, but we log for debugging
+                logger.info(f"Progress: {stage} - {message}")
             out_path, diag = pipe.process(
                 video_path=video_path,
                 bg_image_path=bg_path,
                 point_x=None,
                 point_y=None,
                 auto_box=True,
+                work_dir=None,
+                progress_callback=progress_callback  # Pass callback if supported
             )
             logger.info(f"=== PIPELINE.PROCESS RETURNED ===")
             logger.info(f"Pipeline completed: out_path={out_path}, diag={diag}")
             logger.error(f"❌ Pipeline.process failed: {e}")
             import traceback
             logger.error(f"Full traceback: {traceback.format_exc()}")
+            error_status = gpu_status + f"\n\n❌ PROCESSING FAILED\n🚨 Error: {str(e)[:200]}..."
+            yield gr.update(visible=True), gr.update(visible=False), None, error_status
             return
         if out_path:
+            # Enhanced final processing stats with detailed breakdown
             fps = diag.get('fps', 'unknown')
             resolution = diag.get('resolution', 'unknown')
             sam2_ok = diag.get('sam2_ok', False)
             matany_ok = diag.get('matany_ok', False)
             processing_time = diag.get('total_time_sec', 0)
+            sam2_time = diag.get('sam2_time_sec', 0)
+            matany_time = diag.get('matany_time_sec', 0)
+            # Get final GPU memory usage and verify GPU acceleration was used
+            try:
+                import torch
+                if torch.cuda.is_available():
+                    final_memory = torch.cuda.memory_allocated() / (1024**3)
+                    peak_memory = torch.cuda.max_memory_allocated() / (1024**3)
+                    # Log GPU utilization to verify models used GPU
+                    logger.info(f"GPU USAGE VERIFICATION:")
+                    logger.info(f"  Final memory allocated: {final_memory:.2f}GB")
+                    logger.info(f"  Peak memory used: {peak_memory:.2f}GB")
+                    if peak_memory < 0.1:  # Less than 100MB indicates CPU usage
+                        logger.warning(f"⚠️  LOW GPU USAGE! Peak memory {peak_memory:.2f}GB suggests CPU fallback")
+                    else:
+                        logger.info(f"✅ GPU ACCELERATION CONFIRMED - Peak usage {peak_memory:.2f}GB")
+                    torch.cuda.reset_peak_memory_stats()  # Reset for next run
+                else:
+                    final_memory = peak_memory = 0
+                    logger.warning("❌ CUDA not available - models used CPU")
+            except Exception as e:
+                logger.error(f"GPU memory check failed: {e}")
+                final_memory = peak_memory = 0
+            status_msg = gpu_status + f"""
+🎉 PROCESSING COMPLETE!
+✅ Stage 1: SAM2 segmentation {'✓' if sam2_ok else '✗'} ({sam2_time:.1f}s)
+✅ Stage 2: MatAnyone matting {'✓' if matany_ok else '✗'} ({matany_time:.1f}s)
+✅ Stage 3: Final compositing complete
+📊 RESULTS:
+🎬 Video: {resolution} @ {fps}fps
+⏱️  Total time: {processing_time:.1f}s
+💾 Peak GPU memory: {peak_memory:.1f}GB
+🚀 Pipeline: SAM2 + MatAnyone + GPU acceleration
+Ready for download! 🎯"""
         else:
+            error_details = diag.get('error', 'unknown error')
+            status_msg = gpu_status + f"""
+❌ PROCESSING FAILED
+🚨 Error: {error_details}
+💡 Check logs for detailed troubleshooting info"""
         if STOP.stop:
             yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
             with gr.Column(scale=1):
                 gr.Markdown("## Results")
                 result_video = gr.Video(label="Processed Video", height=400)
+                status_output = gr.Textbox(label="Processing Status", lines=8, max_lines=15, elem_classes=["status-box"])
                 gr.Markdown("""
                 ### Pipeline
                 1. SAM2 Segmentation → mask