Update pipeline/video_pipeline.py
Browse files- pipeline/video_pipeline.py +28 -45
pipeline/video_pipeline.py
CHANGED
|
@@ -6,6 +6,7 @@
|
|
| 6 |
- Optimized for T4 GPU with memory management and fallbacks.
|
| 7 |
- Preserves audio from input video in final output.
|
| 8 |
"""
|
|
|
|
| 9 |
import os
|
| 10 |
import time
|
| 11 |
import tempfile
|
|
@@ -19,25 +20,13 @@
|
|
| 19 |
import numpy as np
|
| 20 |
from collections import deque
|
| 21 |
import torch
|
|
|
|
|
|
|
| 22 |
import streamlit as st
|
| 23 |
-
from models.model_loaders import (
|
| 24 |
-
torch_memory_manager,
|
| 25 |
-
get_memory_usage,
|
| 26 |
-
clear_model_cache
|
| 27 |
-
)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
logger = logging.getLogger(__name__)
|
| 31 |
logging.basicConfig(level=logging.INFO)
|
| 32 |
|
| 33 |
-
def check_gpu(logger):
|
| 34 |
-
"""Check if GPU is available and log memory usage."""
|
| 35 |
-
if torch.cuda.is_available():
|
| 36 |
-
logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
|
| 37 |
-
return True
|
| 38 |
-
logger.warning("CUDA is NOT available. Falling back to CPU.")
|
| 39 |
-
return False
|
| 40 |
-
|
| 41 |
# --- T4 GPU Optimizations ---
|
| 42 |
def setup_t4_environment():
|
| 43 |
"""Configure PyTorch and CUDA for Tesla T4"""
|
|
@@ -68,26 +57,6 @@ def heartbeat_monitor(running_flag: dict, interval: float = 8.0):
|
|
| 68 |
print(f"[HEARTBEAT] t={int(time.time())}", flush=True)
|
| 69 |
time.sleep(interval)
|
| 70 |
|
| 71 |
-
# --- VRAM Management ---
|
| 72 |
-
class VRAMAdaptiveController:
|
| 73 |
-
"""Adjusts memory usage based on available VRAM"""
|
| 74 |
-
def __init__(self):
|
| 75 |
-
self.memory_window = 96
|
| 76 |
-
self.cleanup_every = 20
|
| 77 |
-
def adapt(self):
|
| 78 |
-
"""Adjust parameters based on current VRAM availability"""
|
| 79 |
-
if not torch.cuda.is_available():
|
| 80 |
-
return
|
| 81 |
-
free, _ = torch.cuda.mem_get_info()
|
| 82 |
-
free_gb = free / (1024 ** 3)
|
| 83 |
-
if free_gb < 1.6:
|
| 84 |
-
self.memory_window = max(48, self.memory_window - 8)
|
| 85 |
-
self.cleanup_every = max(12, self.cleanup_every - 2)
|
| 86 |
-
logger.warning(f"Low VRAM ({free_gb:.2f}GB) → Reduced window to {self.memory_window}")
|
| 87 |
-
elif free_gb > 3.0:
|
| 88 |
-
self.memory_window = min(128, self.memory_window + 4)
|
| 89 |
-
self.cleanup_every = min(40, self.cleanup_every + 2)
|
| 90 |
-
|
| 91 |
# --- Audio Extraction ---
|
| 92 |
def extract_audio(input_video_path, output_audio_path):
|
| 93 |
"""Extract audio from input video using FFmpeg"""
|
|
@@ -129,6 +98,7 @@ def _normalize_input(inp, work_dir: Path) -> str:
|
|
| 129 |
return inp
|
| 130 |
target = work_dir / "input.mp4"
|
| 131 |
if hasattr(inp, "read"):
|
|
|
|
| 132 |
with open(target, "wb") as f:
|
| 133 |
f.write(inp.read())
|
| 134 |
else:
|
|
@@ -147,7 +117,7 @@ def generate_first_frame_mask(video_path, predictor):
|
|
| 147 |
if max(h, w) > 1080:
|
| 148 |
scale = 1080 / max(h, w)
|
| 149 |
frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
|
| 150 |
-
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
|
| 151 |
predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 152 |
masks, scores, _ = predictor.predict(
|
| 153 |
point_coords=np.array([[w//2, h//2]]),
|
|
@@ -202,7 +172,7 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
|
|
| 202 |
# Verify alpha channel
|
| 203 |
cap = cv2.VideoCapture(output_path)
|
| 204 |
ret, frame = cap.read()
|
| 205 |
-
if ret:
|
| 206 |
logger.info(f"[create_transparent_mov] FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
|
| 207 |
else:
|
| 208 |
logger.error("[create_transparent_mov] Failed to read output video")
|
|
@@ -240,7 +210,7 @@ def stage1_create_transparent_video(input_file, sam2_predictor, matanyone_proces
|
|
| 240 |
mask_path = str(temp_dir / "mask.png")
|
| 241 |
cv2.imwrite(mask_path, mask)
|
| 242 |
logger.info(f"[stage1] First-frame mask saved: {mask_path}")
|
| 243 |
-
# MatAnyone processing
|
| 244 |
foreground_path, alpha_path = matanyone_processor.process_video(
|
| 245 |
input_path=input_path,
|
| 246 |
mask_path=mask_path,
|
|
@@ -259,6 +229,7 @@ def stage1_create_transparent_video(input_file, sam2_predictor, matanyone_proces
|
|
| 259 |
raise RuntimeError("Transparent MOV creation failed")
|
| 260 |
# Save to persistent storage
|
| 261 |
persist_path = Path("tmp") / "transparent_video.mov"
|
|
|
|
| 262 |
shutil.copyfile(transparent_path, persist_path)
|
| 263 |
logger.info(f"[stage1] Transparent video saved: {persist_path}")
|
| 264 |
# Return both transparent video and audio paths for Stage 2
|
|
@@ -283,10 +254,13 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
|
|
| 283 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 284 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 285 |
# Prepare background
|
| 286 |
-
if bg_type == "image":
|
| 287 |
-
bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
|
| 288 |
-
else: #
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
| 290 |
bg_resized = cv2.resize(bg_array, (width, height))
|
| 291 |
# Composite frames (no audio yet)
|
| 292 |
temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
|
|
@@ -300,7 +274,7 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
|
|
| 300 |
bgr, alpha = frame[:, :, :3], frame[:, :, 3:4] / 255.0
|
| 301 |
composite = (bgr * alpha + bg_resized * (1 - alpha)).astype(np.uint8)
|
| 302 |
else:
|
| 303 |
-
composite = frame
|
| 304 |
out.write(composite)
|
| 305 |
cap.release()
|
| 306 |
out.release()
|
|
@@ -311,7 +285,7 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
|
|
| 311 |
if not success:
|
| 312 |
logger.warning("Audio muxing failed, returning video without audio")
|
| 313 |
return temp_output_path
|
| 314 |
-
os.remove(temp_output_path)
|
| 315 |
return final_output_path
|
| 316 |
else:
|
| 317 |
logger.warning("No audio found, returning video without audio")
|
|
@@ -321,5 +295,14 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
|
|
| 321 |
st.error(f"Stage 2 Error: {str(e)}")
|
| 322 |
return None
|
| 323 |
|
| 324 |
-
# ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
setup_t4_environment()
|
|
|
|
| 6 |
- Optimized for T4 GPU with memory management and fallbacks.
|
| 7 |
- Preserves audio from input video in final output.
|
| 8 |
"""
|
| 9 |
+
|
| 10 |
import os
|
| 11 |
import time
|
| 12 |
import tempfile
|
|
|
|
| 20 |
import numpy as np
|
| 21 |
from collections import deque
|
| 22 |
import torch
|
| 23 |
+
from PIL import Image
|
| 24 |
+
|
| 25 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
logger = logging.getLogger("Advanced Video Background Replacer")
|
|
|
|
| 28 |
logging.basicConfig(level=logging.INFO)
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
# --- T4 GPU Optimizations ---
|
| 31 |
def setup_t4_environment():
|
| 32 |
"""Configure PyTorch and CUDA for Tesla T4"""
|
|
|
|
| 57 |
print(f"[HEARTBEAT] t={int(time.time())}", flush=True)
|
| 58 |
time.sleep(interval)
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
# --- Audio Extraction ---
|
| 61 |
def extract_audio(input_video_path, output_audio_path):
|
| 62 |
"""Extract audio from input video using FFmpeg"""
|
|
|
|
| 98 |
return inp
|
| 99 |
target = work_dir / "input.mp4"
|
| 100 |
if hasattr(inp, "read"):
|
| 101 |
+
inp.seek(0)
|
| 102 |
with open(target, "wb") as f:
|
| 103 |
f.write(inp.read())
|
| 104 |
else:
|
|
|
|
| 117 |
if max(h, w) > 1080:
|
| 118 |
scale = 1080 / max(h, w)
|
| 119 |
frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
|
| 120 |
+
with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32):
|
| 121 |
predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
|
| 122 |
masks, scores, _ = predictor.predict(
|
| 123 |
point_coords=np.array([[w//2, h//2]]),
|
|
|
|
| 172 |
# Verify alpha channel
|
| 173 |
cap = cv2.VideoCapture(output_path)
|
| 174 |
ret, frame = cap.read()
|
| 175 |
+
if ret and frame.shape[-1] == 4:
|
| 176 |
logger.info(f"[create_transparent_mov] FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
|
| 177 |
else:
|
| 178 |
logger.error("[create_transparent_mov] Failed to read output video")
|
|
|
|
| 210 |
mask_path = str(temp_dir / "mask.png")
|
| 211 |
cv2.imwrite(mask_path, mask)
|
| 212 |
logger.info(f"[stage1] First-frame mask saved: {mask_path}")
|
| 213 |
+
# MatAnyone processing (should return paths to RGBA and alpha videos)
|
| 214 |
foreground_path, alpha_path = matanyone_processor.process_video(
|
| 215 |
input_path=input_path,
|
| 216 |
mask_path=mask_path,
|
|
|
|
| 229 |
raise RuntimeError("Transparent MOV creation failed")
|
| 230 |
# Save to persistent storage
|
| 231 |
persist_path = Path("tmp") / "transparent_video.mov"
|
| 232 |
+
persist_path.parent.mkdir(parents=True, exist_ok=True)
|
| 233 |
shutil.copyfile(transparent_path, persist_path)
|
| 234 |
logger.info(f"[stage1] Transparent video saved: {persist_path}")
|
| 235 |
# Return both transparent video and audio paths for Stage 2
|
|
|
|
| 254 |
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 255 |
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 256 |
# Prepare background
|
| 257 |
+
if bg_type.lower() == "image" and isinstance(background, Image.Image):
|
| 258 |
+
bg_array = cv2.cvtColor(np.array(background.resize((width, height))), cv2.COLOR_RGB2BGR)
|
| 259 |
+
else: # Color, e.g. "#00FF00"
|
| 260 |
+
color_rgb = (0,255,0)
|
| 261 |
+
if isinstance(background, str) and background.startswith("#"):
|
| 262 |
+
color_rgb = tuple(int(background.lstrip("#")[i:i+2], 16) for i in (0, 2, 4))
|
| 263 |
+
bg_array = np.full((height, width, 3), color_rgb, dtype=np.uint8)
|
| 264 |
bg_resized = cv2.resize(bg_array, (width, height))
|
| 265 |
# Composite frames (no audio yet)
|
| 266 |
temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
|
|
|
|
| 274 |
bgr, alpha = frame[:, :, :3], frame[:, :, 3:4] / 255.0
|
| 275 |
composite = (bgr * alpha + bg_resized * (1 - alpha)).astype(np.uint8)
|
| 276 |
else:
|
| 277 |
+
composite = frame # Fallback: no alpha
|
| 278 |
out.write(composite)
|
| 279 |
cap.release()
|
| 280 |
out.release()
|
|
|
|
| 285 |
if not success:
|
| 286 |
logger.warning("Audio muxing failed, returning video without audio")
|
| 287 |
return temp_output_path
|
| 288 |
+
os.remove(temp_output_path) # Clean up temp file
|
| 289 |
return final_output_path
|
| 290 |
else:
|
| 291 |
logger.warning("No audio found, returning video without audio")
|
|
|
|
| 295 |
st.error(f"Stage 2 Error: {str(e)}")
|
| 296 |
return None
|
| 297 |
|
| 298 |
+
# --- Helper for GPU check (optional for UI/session) ---
|
| 299 |
+
def check_gpu(logger):
|
| 300 |
+
"""Check if GPU is available and log memory usage."""
|
| 301 |
+
if torch.cuda.is_available():
|
| 302 |
+
logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
|
| 303 |
+
return True
|
| 304 |
+
logger.warning("CUDA is NOT available. Falling back to CPU.")
|
| 305 |
+
return False
|
| 306 |
+
|
| 307 |
+
# --- Initialize T4 tuning immediately if imported as module ---
|
| 308 |
setup_t4_environment()
|