Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Oct 3, 2025

Commit

fed9904

verified ·

1 Parent(s): 00e27de

Update pipeline/video_pipeline.py

Browse files

Files changed (1) hide show

pipeline/video_pipeline.py +8 -24

pipeline/video_pipeline.py CHANGED Viewed

@@ -32,6 +32,14 @@
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 # --- T4 GPU Optimizations ---
 def setup_t4_environment():
     """Configure PyTorch and CUDA for Tesla T4"""
@@ -68,7 +76,6 @@ class VRAMAdaptiveController:
     def __init__(self):
         self.memory_window = 96
         self.cleanup_every = 20
     def adapt(self):
         """Adjust parameters based on current VRAM availability"""
         if not torch.cuda.is_available():
@@ -138,12 +145,10 @@ def generate_first_frame_mask(video_path, predictor):
     cap.release()
     if not ret:
         raise ValueError("Failed to read video frame")
     h, w = frame.shape[:2]
     if max(h, w) > 1080:
         scale = 1080 / max(h, w)
         frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
     with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
         predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         masks, _, _ = predictor.predict(
@@ -160,11 +165,9 @@ def smooth_alpha_video(alpha_path, output_path, window_size=5):
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)
     frame_buffer = deque(maxlen=window_size)
     while True:
         ret, frame = cap.read()
         if not ret:
@@ -174,7 +177,6 @@ def smooth_alpha_video(alpha_path, output_path, window_size=5):
         frame_buffer.append(frame.astype(np.float32))
         smoothed = np.mean(frame_buffer, axis=0).astype(np.uint8)
         out.write(smoothed)
     cap.release()
     out.release()
     return output_path
@@ -195,7 +197,6 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
             output_path
         ]
         subprocess.run(cmd, check=True, capture_output=True)
         # Verify alpha channel
         cap = cv2.VideoCapture(output_path)
         ret, frame = cap.read()
@@ -203,7 +204,6 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
             logger.info(f"FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
         cap.release()
         return output_path
     except Exception as e:
         logger.error(f"FFmpeg MOV creation failed: {e}")
         return None
@@ -214,28 +214,23 @@ def stage1_create_transparent_video(input_file):
     logger.info("Stage 1: Creating transparent video")
     heartbeat_flag = {"running": True}
     threading.Thread(target=heartbeat_monitor, args=(heartbeat_flag,), daemon=True).start()
     try:
         # Load models
         sam2_predictor = load_sam2_predictor()
         matanyone_processor = load_matanyone_processor()
         if not sam2_predictor or not matanyone_processor:
             raise RuntimeError("Failed to load models")
         # Process video
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_dir = Path(temp_dir)
             input_path = _normalize_input(input_file, temp_dir)
             # Extract audio from input video
             audio_path = str(temp_dir / "audio.aac")
             extract_audio(input_path, audio_path)
             # Generate first-frame mask
             mask = generate_first_frame_mask(input_path, sam2_predictor)
             mask_path = str(temp_dir / "mask.png")
             cv2.imwrite(mask_path, mask)
             # MatAnyone processing
             foreground_path, alpha_path = matanyone_processor.process_video(
                 input_path=input_path,
@@ -243,22 +238,17 @@ def stage1_create_transparent_video(input_file):
                 output_path=str(temp_dir),
                 max_size=720
             )
             # Temporal smoothing
             smoothed_alpha = smooth_alpha_video(alpha_path, str(temp_dir / "alpha_smoothed.mp4"))
             # Create transparent MOV
             transparent_path = create_transparent_mov(foreground_path, smoothed_alpha, temp_dir)
             if not transparent_path:
                 raise RuntimeError("Transparent MOV creation failed")
             # Save to persistent storage
             persist_path = Path("tmp") / "transparent_video.mov"
             shutil.copyfile(transparent_path, persist_path)
             # Return both transparent video and audio paths for Stage 2
             return str(persist_path), audio_path
     except Exception as e:
         logger.error(f"Stage 1 failed: {e}", exc_info=True)
         st.error(f"Stage 1 Error: {str(e)}")
@@ -278,19 +268,16 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
         fps = cap.get(cv2.CAP_PROP_FPS)
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         # Prepare background
         if bg_type == "image":
             bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
         else:  # color
             bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
         bg_resized = cv2.resize(bg_array, (width, height))
         # Composite frames (no audio yet)
         temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
         while True:
             ret, frame = cap.read()
             if not ret:
@@ -301,10 +288,8 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
             else:
                 composite = frame  # Fallback: no alpha
             out.write(composite)
         cap.release()
         out.release()
         # Mux audio back into the final video
         final_output_path = str(Path("tmp") / "final_output.mp4")
         if audio_path and os.path.exists(audio_path):
@@ -317,7 +302,6 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
         else:
             logger.warning("No audio found, returning video without audio")
             return temp_output_path
     except Exception as e:
         logger.error(f"Stage 2 failed: {e}", exc_info=True)
         st.error(f"Stage 2 Error: {str(e)}")

 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
+def check_gpu():
+    """Check if GPU is available and log memory usage."""
+    if torch.cuda.is_available():
+        logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
+        return True
+    logger.warning("CUDA is NOT available. Falling back to CPU.")
+    return False
 # --- T4 GPU Optimizations ---
 def setup_t4_environment():
     """Configure PyTorch and CUDA for Tesla T4"""
     def __init__(self):
         self.memory_window = 96
         self.cleanup_every = 20
     def adapt(self):
         """Adjust parameters based on current VRAM availability"""
         if not torch.cuda.is_available():
     cap.release()
     if not ret:
         raise ValueError("Failed to read video frame")
     h, w = frame.shape[:2]
     if max(h, w) > 1080:
         scale = 1080 / max(h, w)
         frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
     with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
         predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         masks, _, _ = predictor.predict(
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)
     frame_buffer = deque(maxlen=window_size)
     while True:
         ret, frame = cap.read()
         if not ret:
         frame_buffer.append(frame.astype(np.float32))
         smoothed = np.mean(frame_buffer, axis=0).astype(np.uint8)
         out.write(smoothed)
     cap.release()
     out.release()
     return output_path
             output_path
         ]
         subprocess.run(cmd, check=True, capture_output=True)
         # Verify alpha channel
         cap = cv2.VideoCapture(output_path)
         ret, frame = cap.read()
             logger.info(f"FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
         cap.release()
         return output_path
     except Exception as e:
         logger.error(f"FFmpeg MOV creation failed: {e}")
         return None
     logger.info("Stage 1: Creating transparent video")
     heartbeat_flag = {"running": True}
     threading.Thread(target=heartbeat_monitor, args=(heartbeat_flag,), daemon=True).start()
     try:
         # Load models
         sam2_predictor = load_sam2_predictor()
         matanyone_processor = load_matanyone_processor()
         if not sam2_predictor or not matanyone_processor:
             raise RuntimeError("Failed to load models")
         # Process video
         with tempfile.TemporaryDirectory() as temp_dir:
             temp_dir = Path(temp_dir)
             input_path = _normalize_input(input_file, temp_dir)
             # Extract audio from input video
             audio_path = str(temp_dir / "audio.aac")
             extract_audio(input_path, audio_path)
             # Generate first-frame mask
             mask = generate_first_frame_mask(input_path, sam2_predictor)
             mask_path = str(temp_dir / "mask.png")
             cv2.imwrite(mask_path, mask)
             # MatAnyone processing
             foreground_path, alpha_path = matanyone_processor.process_video(
                 input_path=input_path,
                 output_path=str(temp_dir),
                 max_size=720
             )
             # Temporal smoothing
             smoothed_alpha = smooth_alpha_video(alpha_path, str(temp_dir / "alpha_smoothed.mp4"))
             # Create transparent MOV
             transparent_path = create_transparent_mov(foreground_path, smoothed_alpha, temp_dir)
             if not transparent_path:
                 raise RuntimeError("Transparent MOV creation failed")
             # Save to persistent storage
             persist_path = Path("tmp") / "transparent_video.mov"
             shutil.copyfile(transparent_path, persist_path)
             # Return both transparent video and audio paths for Stage 2
             return str(persist_path), audio_path
     except Exception as e:
         logger.error(f"Stage 1 failed: {e}", exc_info=True)
         st.error(f"Stage 1 Error: {str(e)}")
         fps = cap.get(cv2.CAP_PROP_FPS)
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
         # Prepare background
         if bg_type == "image":
             bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
         else:  # color
             bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
         bg_resized = cv2.resize(bg_array, (width, height))
         # Composite frames (no audio yet)
         temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
         fourcc = cv2.VideoWriter_fourcc(*'mp4v')
         out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
         while True:
             ret, frame = cap.read()
             if not ret:
             else:
                 composite = frame  # Fallback: no alpha
             out.write(composite)
         cap.release()
         out.release()
         # Mux audio back into the final video
         final_output_path = str(Path("tmp") / "final_output.mp4")
         if audio_path and os.path.exists(audio_path):
         else:
             logger.warning("No audio found, returning video without audio")
             return temp_output_path
     except Exception as e:
         logger.error(f"Stage 2 failed: {e}", exc_info=True)
         st.error(f"Stage 2 Error: {str(e)}")