MogensR commited on
Commit
b04093d
·
verified ·
1 Parent(s): 68bff62

Update pipeline/video_pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline/video_pipeline.py +28 -45
pipeline/video_pipeline.py CHANGED
@@ -6,6 +6,7 @@
6
  - Optimized for T4 GPU with memory management and fallbacks.
7
  - Preserves audio from input video in final output.
8
  """
 
9
  import os
10
  import time
11
  import tempfile
@@ -19,25 +20,13 @@
19
  import numpy as np
20
  from collections import deque
21
  import torch
 
 
22
  import streamlit as st
23
- from models.model_loaders import (
24
- torch_memory_manager,
25
- get_memory_usage,
26
- clear_model_cache
27
- )
28
 
29
- # --- Logging Setup ---
30
- logger = logging.getLogger(__name__)
31
  logging.basicConfig(level=logging.INFO)
32
 
33
- def check_gpu(logger):
34
- """Check if GPU is available and log memory usage."""
35
- if torch.cuda.is_available():
36
- logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
37
- return True
38
- logger.warning("CUDA is NOT available. Falling back to CPU.")
39
- return False
40
-
41
  # --- T4 GPU Optimizations ---
42
  def setup_t4_environment():
43
  """Configure PyTorch and CUDA for Tesla T4"""
@@ -68,26 +57,6 @@ def heartbeat_monitor(running_flag: dict, interval: float = 8.0):
68
  print(f"[HEARTBEAT] t={int(time.time())}", flush=True)
69
  time.sleep(interval)
70
 
71
- # --- VRAM Management ---
72
- class VRAMAdaptiveController:
73
- """Adjusts memory usage based on available VRAM"""
74
- def __init__(self):
75
- self.memory_window = 96
76
- self.cleanup_every = 20
77
- def adapt(self):
78
- """Adjust parameters based on current VRAM availability"""
79
- if not torch.cuda.is_available():
80
- return
81
- free, _ = torch.cuda.mem_get_info()
82
- free_gb = free / (1024 ** 3)
83
- if free_gb < 1.6:
84
- self.memory_window = max(48, self.memory_window - 8)
85
- self.cleanup_every = max(12, self.cleanup_every - 2)
86
- logger.warning(f"Low VRAM ({free_gb:.2f}GB) → Reduced window to {self.memory_window}")
87
- elif free_gb > 3.0:
88
- self.memory_window = min(128, self.memory_window + 4)
89
- self.cleanup_every = min(40, self.cleanup_every + 2)
90
-
91
  # --- Audio Extraction ---
92
  def extract_audio(input_video_path, output_audio_path):
93
  """Extract audio from input video using FFmpeg"""
@@ -129,6 +98,7 @@ def _normalize_input(inp, work_dir: Path) -> str:
129
  return inp
130
  target = work_dir / "input.mp4"
131
  if hasattr(inp, "read"):
 
132
  with open(target, "wb") as f:
133
  f.write(inp.read())
134
  else:
@@ -147,7 +117,7 @@ def generate_first_frame_mask(video_path, predictor):
147
  if max(h, w) > 1080:
148
  scale = 1080 / max(h, w)
149
  frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
150
- with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
151
  predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
152
  masks, scores, _ = predictor.predict(
153
  point_coords=np.array([[w//2, h//2]]),
@@ -202,7 +172,7 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
202
  # Verify alpha channel
203
  cap = cv2.VideoCapture(output_path)
204
  ret, frame = cap.read()
205
- if ret:
206
  logger.info(f"[create_transparent_mov] FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
207
  else:
208
  logger.error("[create_transparent_mov] Failed to read output video")
@@ -240,7 +210,7 @@ def stage1_create_transparent_video(input_file, sam2_predictor, matanyone_proces
240
  mask_path = str(temp_dir / "mask.png")
241
  cv2.imwrite(mask_path, mask)
242
  logger.info(f"[stage1] First-frame mask saved: {mask_path}")
243
- # MatAnyone processing
244
  foreground_path, alpha_path = matanyone_processor.process_video(
245
  input_path=input_path,
246
  mask_path=mask_path,
@@ -259,6 +229,7 @@ def stage1_create_transparent_video(input_file, sam2_predictor, matanyone_proces
259
  raise RuntimeError("Transparent MOV creation failed")
260
  # Save to persistent storage
261
  persist_path = Path("tmp") / "transparent_video.mov"
 
262
  shutil.copyfile(transparent_path, persist_path)
263
  logger.info(f"[stage1] Transparent video saved: {persist_path}")
264
  # Return both transparent video and audio paths for Stage 2
@@ -283,10 +254,13 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
283
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
284
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
285
  # Prepare background
286
- if bg_type == "image":
287
- bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
288
- else: # color
289
- bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
 
 
 
290
  bg_resized = cv2.resize(bg_array, (width, height))
291
  # Composite frames (no audio yet)
292
  temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
@@ -300,7 +274,7 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
300
  bgr, alpha = frame[:, :, :3], frame[:, :, 3:4] / 255.0
301
  composite = (bgr * alpha + bg_resized * (1 - alpha)).astype(np.uint8)
302
  else:
303
- composite = frame # Fallback: no alpha
304
  out.write(composite)
305
  cap.release()
306
  out.release()
@@ -311,7 +285,7 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
311
  if not success:
312
  logger.warning("Audio muxing failed, returning video without audio")
313
  return temp_output_path
314
- os.remove(temp_output_path) # Clean up temp file
315
  return final_output_path
316
  else:
317
  logger.warning("No audio found, returning video without audio")
@@ -321,5 +295,14 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
321
  st.error(f"Stage 2 Error: {str(e)}")
322
  return None
323
 
324
- # --- Initialize ---
 
 
 
 
 
 
 
 
 
325
  setup_t4_environment()
 
6
  - Optimized for T4 GPU with memory management and fallbacks.
7
  - Preserves audio from input video in final output.
8
  """
9
+
10
  import os
11
  import time
12
  import tempfile
 
20
  import numpy as np
21
  from collections import deque
22
  import torch
23
+ from PIL import Image
24
+
25
  import streamlit as st
 
 
 
 
 
26
 
27
+ logger = logging.getLogger("Advanced Video Background Replacer")
 
28
  logging.basicConfig(level=logging.INFO)
29
 
 
 
 
 
 
 
 
 
30
  # --- T4 GPU Optimizations ---
31
  def setup_t4_environment():
32
  """Configure PyTorch and CUDA for Tesla T4"""
 
57
  print(f"[HEARTBEAT] t={int(time.time())}", flush=True)
58
  time.sleep(interval)
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  # --- Audio Extraction ---
61
  def extract_audio(input_video_path, output_audio_path):
62
  """Extract audio from input video using FFmpeg"""
 
98
  return inp
99
  target = work_dir / "input.mp4"
100
  if hasattr(inp, "read"):
101
+ inp.seek(0)
102
  with open(target, "wb") as f:
103
  f.write(inp.read())
104
  else:
 
117
  if max(h, w) > 1080:
118
  scale = 1080 / max(h, w)
119
  frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
120
+ with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32):
121
  predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
122
  masks, scores, _ = predictor.predict(
123
  point_coords=np.array([[w//2, h//2]]),
 
172
  # Verify alpha channel
173
  cap = cv2.VideoCapture(output_path)
174
  ret, frame = cap.read()
175
+ if ret and frame.shape[-1] == 4:
176
  logger.info(f"[create_transparent_mov] FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
177
  else:
178
  logger.error("[create_transparent_mov] Failed to read output video")
 
210
  mask_path = str(temp_dir / "mask.png")
211
  cv2.imwrite(mask_path, mask)
212
  logger.info(f"[stage1] First-frame mask saved: {mask_path}")
213
+ # MatAnyone processing (should return paths to RGBA and alpha videos)
214
  foreground_path, alpha_path = matanyone_processor.process_video(
215
  input_path=input_path,
216
  mask_path=mask_path,
 
229
  raise RuntimeError("Transparent MOV creation failed")
230
  # Save to persistent storage
231
  persist_path = Path("tmp") / "transparent_video.mov"
232
+ persist_path.parent.mkdir(parents=True, exist_ok=True)
233
  shutil.copyfile(transparent_path, persist_path)
234
  logger.info(f"[stage1] Transparent video saved: {persist_path}")
235
  # Return both transparent video and audio paths for Stage 2
 
254
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
255
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
256
  # Prepare background
257
+ if bg_type.lower() == "image" and isinstance(background, Image.Image):
258
+ bg_array = cv2.cvtColor(np.array(background.resize((width, height))), cv2.COLOR_RGB2BGR)
259
+ else: # Color, e.g. "#00FF00"
260
+ color_rgb = (0,255,0)
261
+ if isinstance(background, str) and background.startswith("#"):
262
+ color_rgb = tuple(int(background.lstrip("#")[i:i+2], 16) for i in (0, 2, 4))
263
+ bg_array = np.full((height, width, 3), color_rgb, dtype=np.uint8)
264
  bg_resized = cv2.resize(bg_array, (width, height))
265
  # Composite frames (no audio yet)
266
  temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
 
274
  bgr, alpha = frame[:, :, :3], frame[:, :, 3:4] / 255.0
275
  composite = (bgr * alpha + bg_resized * (1 - alpha)).astype(np.uint8)
276
  else:
277
+ composite = frame # Fallback: no alpha
278
  out.write(composite)
279
  cap.release()
280
  out.release()
 
285
  if not success:
286
  logger.warning("Audio muxing failed, returning video without audio")
287
  return temp_output_path
288
+ os.remove(temp_output_path) # Clean up temp file
289
  return final_output_path
290
  else:
291
  logger.warning("No audio found, returning video without audio")
 
295
  st.error(f"Stage 2 Error: {str(e)}")
296
  return None
297
 
298
+ # --- Helper for GPU check (optional for UI/session) ---
299
+ def check_gpu(logger):
300
+ """Check if GPU is available and log memory usage."""
301
+ if torch.cuda.is_available():
302
+ logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
303
+ return True
304
+ logger.warning("CUDA is NOT available. Falling back to CPU.")
305
+ return False
306
+
307
+ # --- Initialize T4 tuning immediately if imported as module ---
308
  setup_t4_environment()