Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Oct 3, 2025

Commit

b04093d

verified ·

1 Parent(s): 68bff62

Update pipeline/video_pipeline.py

Browse files

Files changed (1) hide show

pipeline/video_pipeline.py +28 -45

pipeline/video_pipeline.py CHANGED Viewed

@@ -6,6 +6,7 @@
 - Optimized for T4 GPU with memory management and fallbacks.
 - Preserves audio from input video in final output.
 """
 import os
 import time
 import tempfile
@@ -19,25 +20,13 @@
 import numpy as np
 from collections import deque
 import torch
 import streamlit as st
-from models.model_loaders import (
-    torch_memory_manager,
-    get_memory_usage,
-    clear_model_cache
-)
-# --- Logging Setup ---
-logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
-def check_gpu(logger):
-    """Check if GPU is available and log memory usage."""
-    if torch.cuda.is_available():
-        logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
-        return True
-    logger.warning("CUDA is NOT available. Falling back to CPU.")
-    return False
 # --- T4 GPU Optimizations ---
 def setup_t4_environment():
     """Configure PyTorch and CUDA for Tesla T4"""
@@ -68,26 +57,6 @@ def heartbeat_monitor(running_flag: dict, interval: float = 8.0):
         print(f"[HEARTBEAT] t={int(time.time())}", flush=True)
         time.sleep(interval)
-# --- VRAM Management ---
-class VRAMAdaptiveController:
-    """Adjusts memory usage based on available VRAM"""
-    def __init__(self):
-        self.memory_window = 96
-        self.cleanup_every = 20
-    def adapt(self):
-        """Adjust parameters based on current VRAM availability"""
-        if not torch.cuda.is_available():
-            return
-        free, _ = torch.cuda.mem_get_info()
-        free_gb = free / (1024 ** 3)
-        if free_gb < 1.6:
-            self.memory_window = max(48, self.memory_window - 8)
-            self.cleanup_every = max(12, self.cleanup_every - 2)
-            logger.warning(f"Low VRAM ({free_gb:.2f}GB) → Reduced window to {self.memory_window}")
-        elif free_gb > 3.0:
-            self.memory_window = min(128, self.memory_window + 4)
-            self.cleanup_every = min(40, self.cleanup_every + 2)
 # --- Audio Extraction ---
 def extract_audio(input_video_path, output_audio_path):
     """Extract audio from input video using FFmpeg"""
@@ -129,6 +98,7 @@ def _normalize_input(inp, work_dir: Path) -> str:
         return inp
     target = work_dir / "input.mp4"
     if hasattr(inp, "read"):
         with open(target, "wb") as f:
             f.write(inp.read())
     else:
@@ -147,7 +117,7 @@ def generate_first_frame_mask(video_path, predictor):
     if max(h, w) > 1080:
         scale = 1080 / max(h, w)
         frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
-    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
         predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         masks, scores, _ = predictor.predict(
             point_coords=np.array([[w//2, h//2]]),
@@ -202,7 +172,7 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
         # Verify alpha channel
         cap = cv2.VideoCapture(output_path)
         ret, frame = cap.read()
-        if ret:
             logger.info(f"[create_transparent_mov] FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
         else:
             logger.error("[create_transparent_mov] Failed to read output video")
@@ -240,7 +210,7 @@ def stage1_create_transparent_video(input_file, sam2_predictor, matanyone_proces
             mask_path = str(temp_dir / "mask.png")
             cv2.imwrite(mask_path, mask)
             logger.info(f"[stage1] First-frame mask saved: {mask_path}")
-            # MatAnyone processing
             foreground_path, alpha_path = matanyone_processor.process_video(
                 input_path=input_path,
                 mask_path=mask_path,
@@ -259,6 +229,7 @@ def stage1_create_transparent_video(input_file, sam2_predictor, matanyone_proces
                 raise RuntimeError("Transparent MOV creation failed")
             # Save to persistent storage
             persist_path = Path("tmp") / "transparent_video.mov"
             shutil.copyfile(transparent_path, persist_path)
             logger.info(f"[stage1] Transparent video saved: {persist_path}")
             # Return both transparent video and audio paths for Stage 2
@@ -283,10 +254,13 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         # Prepare background
-        if bg_type == "image":
-            bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
-        else: # color
-            bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
         bg_resized = cv2.resize(bg_array, (width, height))
         # Composite frames (no audio yet)
         temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
@@ -300,7 +274,7 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
                 bgr, alpha = frame[:, :, :3], frame[:, :, 3:4] / 255.0
                 composite = (bgr * alpha + bg_resized * (1 - alpha)).astype(np.uint8)
             else:
-                composite = frame # Fallback: no alpha
             out.write(composite)
         cap.release()
         out.release()
@@ -311,7 +285,7 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
             if not success:
                 logger.warning("Audio muxing failed, returning video without audio")
                 return temp_output_path
-            os.remove(temp_output_path) # Clean up temp file
             return final_output_path
         else:
             logger.warning("No audio found, returning video without audio")
@@ -321,5 +295,14 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
         st.error(f"Stage 2 Error: {str(e)}")
         return None
-# --- Initialize ---
 setup_t4_environment()

 - Optimized for T4 GPU with memory management and fallbacks.
 - Preserves audio from input video in final output.
 """
 import os
 import time
 import tempfile
 import numpy as np
 from collections import deque
 import torch
+from PIL import Image
 import streamlit as st
+logger = logging.getLogger("Advanced Video Background Replacer")
 logging.basicConfig(level=logging.INFO)
 # --- T4 GPU Optimizations ---
 def setup_t4_environment():
     """Configure PyTorch and CUDA for Tesla T4"""
         print(f"[HEARTBEAT] t={int(time.time())}", flush=True)
         time.sleep(interval)
 # --- Audio Extraction ---
 def extract_audio(input_video_path, output_audio_path):
     """Extract audio from input video using FFmpeg"""
         return inp
     target = work_dir / "input.mp4"
     if hasattr(inp, "read"):
+        inp.seek(0)
         with open(target, "wb") as f:
             f.write(inp.read())
     else:
     if max(h, w) > 1080:
         scale = 1080 / max(h, w)
         frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
+    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32):
         predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         masks, scores, _ = predictor.predict(
             point_coords=np.array([[w//2, h//2]]),
         # Verify alpha channel
         cap = cv2.VideoCapture(output_path)
         ret, frame = cap.read()
+        if ret and frame.shape[-1] == 4:
             logger.info(f"[create_transparent_mov] FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
         else:
             logger.error("[create_transparent_mov] Failed to read output video")
             mask_path = str(temp_dir / "mask.png")
             cv2.imwrite(mask_path, mask)
             logger.info(f"[stage1] First-frame mask saved: {mask_path}")
+            # MatAnyone processing (should return paths to RGBA and alpha videos)
             foreground_path, alpha_path = matanyone_processor.process_video(
                 input_path=input_path,
                 mask_path=mask_path,
                 raise RuntimeError("Transparent MOV creation failed")
             # Save to persistent storage
             persist_path = Path("tmp") / "transparent_video.mov"
+            persist_path.parent.mkdir(parents=True, exist_ok=True)
             shutil.copyfile(transparent_path, persist_path)
             logger.info(f"[stage1] Transparent video saved: {persist_path}")
             # Return both transparent video and audio paths for Stage 2
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         # Prepare background
+        if bg_type.lower() == "image" and isinstance(background, Image.Image):
+            bg_array = cv2.cvtColor(np.array(background.resize((width, height))), cv2.COLOR_RGB2BGR)
+        else:  # Color, e.g. "#00FF00"
+            color_rgb = (0,255,0)
+            if isinstance(background, str) and background.startswith("#"):
+                color_rgb = tuple(int(background.lstrip("#")[i:i+2], 16) for i in (0, 2, 4))
+            bg_array = np.full((height, width, 3), color_rgb, dtype=np.uint8)
         bg_resized = cv2.resize(bg_array, (width, height))
         # Composite frames (no audio yet)
         temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
                 bgr, alpha = frame[:, :, :3], frame[:, :, 3:4] / 255.0
                 composite = (bgr * alpha + bg_resized * (1 - alpha)).astype(np.uint8)
             else:
+                composite = frame  # Fallback: no alpha
             out.write(composite)
         cap.release()
         out.release()
             if not success:
                 logger.warning("Audio muxing failed, returning video without audio")
                 return temp_output_path
+            os.remove(temp_output_path)  # Clean up temp file
             return final_output_path
         else:
             logger.warning("No audio found, returning video without audio")
         st.error(f"Stage 2 Error: {str(e)}")
         return None
+# --- Helper for GPU check (optional for UI/session) ---
+def check_gpu(logger):
+    """Check if GPU is available and log memory usage."""
+    if torch.cuda.is_available():
+        logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
+        return True
+    logger.warning("CUDA is NOT available. Falling back to CPU.")
+    return False
+# --- Initialize T4 tuning immediately if imported as module ---
 setup_t4_environment()