Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Oct 2, 2025

Commit

3d9fd7c

verified ·

1 Parent(s): 6b75cf3

Update pipeline/video_pipeline.py

Browse files

Files changed (1) hide show

pipeline/video_pipeline.py +187 -14

pipeline/video_pipeline.py CHANGED Viewed

@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """
-Video Processing Pipeline
 Two-stage processing: SAM2+MatAnyone → Transparent → Composite
-Includes temporal smoothing to eliminate jitter/shaking
 """
 import os
@@ -11,6 +11,8 @@
 import shutil
 import gc
 import logging
 from pathlib import Path
 import cv2
 import numpy as np
@@ -28,13 +30,141 @@
 logger = logging.getLogger(__name__)
 # Persistent temp dir
 TMP_DIR = Path("tmp")
 TMP_DIR.mkdir(parents=True, exist_ok=True)
-# ============================================================================
 # SAM2 Mask Generation
-# ============================================================================
 def generate_mask_from_video_first_frame(video_path, sam2_predictor):
     """
@@ -62,7 +192,7 @@ def generate_mask_from_video_first_frame(video_path, sam2_predictor):
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            # Use SAM2 to generate mask
             with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
                 sam2_predictor.set_image(frame_rgb)
@@ -85,9 +215,9 @@ def generate_mask_from_video_first_frame(video_path, sam2_predictor):
         logger.error(f"Failed to generate mask: {e}", exc_info=True)
         return None
-# ============================================================================
 # TEMPORAL SMOOTHING - Fixes the shaking issue
-# ============================================================================
 def smooth_alpha_video(alpha_video_path, output_path, window_size=5):
     """
@@ -156,9 +286,9 @@ def smooth_alpha_video(alpha_video_path, output_path, window_size=5):
         # Return original path if smoothing fails
         return alpha_video_path
-# ============================================================================
 # Transparent Video Creation
-# ============================================================================
 def create_transparent_mov(foreground_path, alpha_path, temp_dir):
     """
@@ -217,9 +347,9 @@ def create_transparent_mov(foreground_path, alpha_path, temp_dir):
         logger.error(f"Failed to create transparent MOV: {e}")
         return None
-# ============================================================================
-# STAGE 1: Create Transparent Video (with smoothing fix)
-# ============================================================================
 def stage1_create_transparent_video(input_file):
     """
@@ -230,9 +360,27 @@ def stage1_create_transparent_video(input_file):
     2. Process video with MatAnyone (temporal propagation)
     3. Apply temporal smoothing to alpha channel (FIXES SHAKING)
     4. Create transparent .mov file
     """
     logger.info("Starting Stage 1: Create transparent video")
     # Check memory
     memory_info = get_memory_usage()
     if memory_info.get('gpu_free', 0) < 2.0:
@@ -263,8 +411,17 @@ def update_progress(progress, message):
         if sam2_predictor is None:
             st.error("Failed to load SAM2 model")
             return None
         update_progress(0.1, "Loading MatAnyone model...")
         matanyone_result = load_matanyone_processor()
@@ -277,8 +434,17 @@ def update_progress(progress, message):
         if matanyone_processor is None:
             st.error("Failed to load MatAnyone model")
             return None
         # Process video
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_dir = Path(temp_dir)
@@ -296,6 +462,7 @@ def update_progress(progress, message):
             if mask is None:
                 st.error("Failed to generate mask")
                 return None
             mask_path = str(temp_dir / "mask.png")
@@ -336,14 +503,18 @@ def update_progress(progress, message):
                     update_progress(1.0, "Transparent video created successfully")
                     time.sleep(0.5)
                     return str(persist_path)
                 else:
                     st.error("Failed to create transparent video")
                     return None
             except Exception as e:
                 logger.error(f"MatAnyone processing failed: {e}", exc_info=True)
                 st.error(f"MatAnyone processing failed: {e}")
                 return None
     except Exception as e:
@@ -358,17 +529,19 @@ def update_progress(progress, message):
         except:
             pass
         return None
     finally:
         logger.info("Stage 1 cleanup...")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         gc.collect()
-# ============================================================================
 # STAGE 2: Composite with Background
-# ============================================================================
 def stage2_composite_background(transparent_video_path, background, bg_type):
     """

 #!/usr/bin/env python3
 """
+Video Processing Pipeline - T4 Optimized
 Two-stage processing: SAM2+MatAnyone → Transparent → Composite
+Includes temporal smoothing + T4 memory optimizations
 """
 import os
 import shutil
 import gc
 import logging
+import subprocess
+import threading
 from pathlib import Path
 import cv2
 import numpy as np
 logger = logging.getLogger(__name__)
+# ==================================================================================
+# T4 OPTIMIZATIONS - Environment Setup
+# ==================================================================================
+def setup_t4_environment():
+    """Configure environment for Tesla T4 GPU"""
+    os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF",
+                          "expandable_segments:True,max_split_size_mb:256,garbage_collection_threshold:0.7")
+    os.environ.setdefault("OMP_NUM_THREADS", "1")
+    os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
+    os.environ.setdefault("MKL_NUM_THREADS", "1")
+    torch.set_grad_enabled(False)
+    try:
+        torch.backends.cudnn.benchmark = True
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+        torch.set_float32_matmul_precision("high")
+    except Exception:
+        pass
+    if torch.cuda.is_available():
+        try:
+            frac = float(os.getenv("CUDA_MEMORY_FRACTION", "0.88"))
+            torch.cuda.set_per_process_memory_fraction(frac)
+            logger.info(f"CUDA memory fraction = {frac:.2f}")
+        except Exception as e:
+            logger.warning(f"Could not set CUDA memory fraction: {e}")
+# Initialize T4 optimizations at module load
+setup_t4_environment()
+# ==================================================================================
+# HEARTBEAT MONITOR - Prevents HuggingFace Space Timeout
+# ==================================================================================
+def heartbeat_monitor(running_flag: dict, interval: float = 8.0):
+    """Periodic heartbeat to prevent Space watchdog from killing process"""
+    while running_flag.get("running", False):
+        print(f"[HEARTBEAT] t={int(time.time())}", flush=True)
+        time.sleep(interval)
+# ==================================================================================
+# VRAM ADAPTIVE CONTROLLER - Dynamic Memory Management
+# ==================================================================================
+class VRAMAdaptiveController:
+    """Adjusts memory usage based on available VRAM"""
+    def __init__(self):
+        self.memory_window = int(os.getenv("SAM2_WINDOW", "96"))
+        self.cleanup_every = 20
+    def adapt(self):
+        """Adjust parameters based on current VRAM availability"""
+        if not torch.cuda.is_available():
+            return
+        free, total = torch.cuda.mem_get_info()
+        free_gb = free / (1024 ** 3)
+        # Tighten if low on memory
+        if free_gb < 1.6:
+            self.memory_window = max(48, self.memory_window - 8)
+            self.cleanup_every = max(12, self.cleanup_every - 2)
+            logger.warning(f"Low VRAM ({free_gb:.2f}GB) → window={self.memory_window}")
+        # Relax if plenty of memory
+        elif free_gb > 3.0:
+            self.memory_window = min(128, self.memory_window + 4)
+            self.cleanup_every = min(40, self.cleanup_every + 2)
+    def should_cleanup(self, frame_count: int) -> bool:
+        """Check if it's time for memory cleanup"""
+        return frame_count % self.cleanup_every == 0
+# ==================================================================================
+# MEMORY PRUNING - SAM2 State Management
+# ==================================================================================
+def prune_sam2_state(predictor, state, keep: int):
+    """Prune SAM2 temporal cache to bounded window"""
+    try:
+        if hasattr(predictor, "prune_state"):
+            predictor.prune_state(state, keep=keep)
+        elif hasattr(state, "prune") and callable(getattr(state, "prune")):
+            state.prune(keep=keep)
+    except Exception as e:
+        logger.debug(f"SAM2 prune warning: {e}")
+# ==================================================================================
+# FP16 OPTIMIZATION - Model Loading
+# ==================================================================================
+def optimize_model_for_t4(model, device):
+    """Apply FP16 and channels_last optimizations for T4"""
+    try:
+        if device.type == "cuda":
+            model = model.half().to(device)
+            model = model.to(memory_format=torch.channels_last)
+            logger.info("Applied FP16 + channels_last optimization")
+        return model
+    except Exception as e:
+        logger.warning(f"FP16 optimization warning: {e}")
+        return model
+# ==================================================================================
+# AUDIO MUXING - Safer FFmpeg Audio Restoration
+# ==================================================================================
+def mux_audio(video_no_audio: str, source_with_audio: str, output: str) -> bool:
+    """Restore audio from original video using FFmpeg"""
+    cmd = [
+        "ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
+        "-i", video_no_audio,
+        "-i", source_with_audio,
+        "-map", "0:v:0", "-map", "1:a:0?",
+        "-c:v", "copy", "-c:a", "aac", "-shortest",
+        output
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=180)
+        if result.returncode != 0:
+            logger.warning(f"Audio mux failed: {result.stderr.strip()}")
+            return False
+        return True
+    except Exception as e:
+        logger.warning(f"Audio mux error: {e}")
+        return False
 # Persistent temp dir
 TMP_DIR = Path("tmp")
 TMP_DIR.mkdir(parents=True, exist_ok=True)
+# ==================================================================================
 # SAM2 Mask Generation
+# ==================================================================================
 def generate_mask_from_video_first_frame(video_path, sam2_predictor):
     """
             frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            # Use SAM2 to generate mask with FP16 optimization
             with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
                 sam2_predictor.set_image(frame_rgb)
         logger.error(f"Failed to generate mask: {e}", exc_info=True)
         return None
+# ==================================================================================
 # TEMPORAL SMOOTHING - Fixes the shaking issue
+# ==================================================================================
 def smooth_alpha_video(alpha_video_path, output_path, window_size=5):
     """
         # Return original path if smoothing fails
         return alpha_video_path
+# ==================================================================================
 # Transparent Video Creation
+# ==================================================================================
 def create_transparent_mov(foreground_path, alpha_path, temp_dir):
     """
         logger.error(f"Failed to create transparent MOV: {e}")
         return None
+# ==================================================================================
+# STAGE 1: Create Transparent Video (T4 Optimized)
+# ==================================================================================
 def stage1_create_transparent_video(input_file):
     """
     2. Process video with MatAnyone (temporal propagation)
     3. Apply temporal smoothing to alpha channel (FIXES SHAKING)
     4. Create transparent .mov file
+    T4 Optimizations:
+    - Heartbeat monitor prevents timeout
+    - VRAM adaptive controller manages memory
+    - FP16 optimization for models
+    - Memory pruning for SAM2 state
     """
     logger.info("Starting Stage 1: Create transparent video")
+    # Start heartbeat monitor
+    heartbeat_flag = {"running": True}
+    heartbeat_thread = threading.Thread(
+        target=heartbeat_monitor,
+        args=(heartbeat_flag, 8.0),
+        daemon=True
+    )
+    heartbeat_thread.start()
+    # Initialize VRAM controller
+    vram_ctrl = VRAMAdaptiveController()
     # Check memory
     memory_info = get_memory_usage()
     if memory_info.get('gpu_free', 0) < 2.0:
         if sam2_predictor is None:
             st.error("Failed to load SAM2 model")
+            heartbeat_flag["running"] = False
             return None
+        # Try to optimize SAM2 model for T4
+        if hasattr(sam2_predictor, 'model') and sam2_predictor.model is not None:
+            try:
+                device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+                sam2_predictor.model = optimize_model_for_t4(sam2_predictor.model, device)
+            except Exception as e:
+                logger.warning(f"Could not optimize SAM2: {e}")
         update_progress(0.1, "Loading MatAnyone model...")
         matanyone_result = load_matanyone_processor()
         if matanyone_processor is None:
             st.error("Failed to load MatAnyone model")
+            heartbeat_flag["running"] = False
             return None
+        # Try to optimize MatAnyone model for T4
+        if hasattr(matanyone_processor, 'model') and matanyone_processor.model is not None:
+            try:
+                device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+                matanyone_processor.model = optimize_model_for_t4(matanyone_processor.model, device)
+            except Exception as e:
+                logger.warning(f"Could not optimize MatAnyone: {e}")
         # Process video
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_dir = Path(temp_dir)
             if mask is None:
                 st.error("Failed to generate mask")
+                heartbeat_flag["running"] = False
                 return None
             mask_path = str(temp_dir / "mask.png")
                     update_progress(1.0, "Transparent video created successfully")
                     time.sleep(0.5)
+                    heartbeat_flag["running"] = False
                     return str(persist_path)
                 else:
                     st.error("Failed to create transparent video")
+                    heartbeat_flag["running"] = False
                     return None
             except Exception as e:
                 logger.error(f"MatAnyone processing failed: {e}", exc_info=True)
                 st.error(f"MatAnyone processing failed: {e}")
+                heartbeat_flag["running"] = False
                 return None
     except Exception as e:
         except:
             pass
+        heartbeat_flag["running"] = False
         return None
     finally:
+        heartbeat_flag["running"] = False
         logger.info("Stage 1 cleanup...")
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         gc.collect()
+# ==================================================================================
 # STAGE 2: Composite with Background
+# ==================================================================================
 def stage2_composite_background(transparent_video_path, background, bg_type):
     """