MogensR commited on
Commit
fed9904
·
verified ·
1 Parent(s): 00e27de

Update pipeline/video_pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline/video_pipeline.py +8 -24
pipeline/video_pipeline.py CHANGED
@@ -32,6 +32,14 @@
32
  logger = logging.getLogger(__name__)
33
  logging.basicConfig(level=logging.INFO)
34
 
 
 
 
 
 
 
 
 
35
  # --- T4 GPU Optimizations ---
36
  def setup_t4_environment():
37
  """Configure PyTorch and CUDA for Tesla T4"""
@@ -68,7 +76,6 @@ class VRAMAdaptiveController:
68
  def __init__(self):
69
  self.memory_window = 96
70
  self.cleanup_every = 20
71
-
72
  def adapt(self):
73
  """Adjust parameters based on current VRAM availability"""
74
  if not torch.cuda.is_available():
@@ -138,12 +145,10 @@ def generate_first_frame_mask(video_path, predictor):
138
  cap.release()
139
  if not ret:
140
  raise ValueError("Failed to read video frame")
141
-
142
  h, w = frame.shape[:2]
143
  if max(h, w) > 1080:
144
  scale = 1080 / max(h, w)
145
  frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
146
-
147
  with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
148
  predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
149
  masks, _, _ = predictor.predict(
@@ -160,11 +165,9 @@ def smooth_alpha_video(alpha_path, output_path, window_size=5):
160
  fps = cap.get(cv2.CAP_PROP_FPS)
161
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
162
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
163
-
164
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
165
  out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)
166
  frame_buffer = deque(maxlen=window_size)
167
-
168
  while True:
169
  ret, frame = cap.read()
170
  if not ret:
@@ -174,7 +177,6 @@ def smooth_alpha_video(alpha_path, output_path, window_size=5):
174
  frame_buffer.append(frame.astype(np.float32))
175
  smoothed = np.mean(frame_buffer, axis=0).astype(np.uint8)
176
  out.write(smoothed)
177
-
178
  cap.release()
179
  out.release()
180
  return output_path
@@ -195,7 +197,6 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
195
  output_path
196
  ]
197
  subprocess.run(cmd, check=True, capture_output=True)
198
-
199
  # Verify alpha channel
200
  cap = cv2.VideoCapture(output_path)
201
  ret, frame = cap.read()
@@ -203,7 +204,6 @@ def create_transparent_mov(foreground_path, alpha_path, output_dir):
203
  logger.info(f"FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
204
  cap.release()
205
  return output_path
206
-
207
  except Exception as e:
208
  logger.error(f"FFmpeg MOV creation failed: {e}")
209
  return None
@@ -214,28 +214,23 @@ def stage1_create_transparent_video(input_file):
214
  logger.info("Stage 1: Creating transparent video")
215
  heartbeat_flag = {"running": True}
216
  threading.Thread(target=heartbeat_monitor, args=(heartbeat_flag,), daemon=True).start()
217
-
218
  try:
219
  # Load models
220
  sam2_predictor = load_sam2_predictor()
221
  matanyone_processor = load_matanyone_processor()
222
  if not sam2_predictor or not matanyone_processor:
223
  raise RuntimeError("Failed to load models")
224
-
225
  # Process video
226
  with tempfile.TemporaryDirectory() as temp_dir:
227
  temp_dir = Path(temp_dir)
228
  input_path = _normalize_input(input_file, temp_dir)
229
-
230
  # Extract audio from input video
231
  audio_path = str(temp_dir / "audio.aac")
232
  extract_audio(input_path, audio_path)
233
-
234
  # Generate first-frame mask
235
  mask = generate_first_frame_mask(input_path, sam2_predictor)
236
  mask_path = str(temp_dir / "mask.png")
237
  cv2.imwrite(mask_path, mask)
238
-
239
  # MatAnyone processing
240
  foreground_path, alpha_path = matanyone_processor.process_video(
241
  input_path=input_path,
@@ -243,22 +238,17 @@ def stage1_create_transparent_video(input_file):
243
  output_path=str(temp_dir),
244
  max_size=720
245
  )
246
-
247
  # Temporal smoothing
248
  smoothed_alpha = smooth_alpha_video(alpha_path, str(temp_dir / "alpha_smoothed.mp4"))
249
-
250
  # Create transparent MOV
251
  transparent_path = create_transparent_mov(foreground_path, smoothed_alpha, temp_dir)
252
  if not transparent_path:
253
  raise RuntimeError("Transparent MOV creation failed")
254
-
255
  # Save to persistent storage
256
  persist_path = Path("tmp") / "transparent_video.mov"
257
  shutil.copyfile(transparent_path, persist_path)
258
-
259
  # Return both transparent video and audio paths for Stage 2
260
  return str(persist_path), audio_path
261
-
262
  except Exception as e:
263
  logger.error(f"Stage 1 failed: {e}", exc_info=True)
264
  st.error(f"Stage 1 Error: {str(e)}")
@@ -278,19 +268,16 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
278
  fps = cap.get(cv2.CAP_PROP_FPS)
279
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
280
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
281
-
282
  # Prepare background
283
  if bg_type == "image":
284
  bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
285
  else: # color
286
  bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
287
  bg_resized = cv2.resize(bg_array, (width, height))
288
-
289
  # Composite frames (no audio yet)
290
  temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
291
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
292
  out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
293
-
294
  while True:
295
  ret, frame = cap.read()
296
  if not ret:
@@ -301,10 +288,8 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
301
  else:
302
  composite = frame # Fallback: no alpha
303
  out.write(composite)
304
-
305
  cap.release()
306
  out.release()
307
-
308
  # Mux audio back into the final video
309
  final_output_path = str(Path("tmp") / "final_output.mp4")
310
  if audio_path and os.path.exists(audio_path):
@@ -317,7 +302,6 @@ def stage2_composite_background(transparent_video_path, audio_path, background,
317
  else:
318
  logger.warning("No audio found, returning video without audio")
319
  return temp_output_path
320
-
321
  except Exception as e:
322
  logger.error(f"Stage 2 failed: {e}", exc_info=True)
323
  st.error(f"Stage 2 Error: {str(e)}")
 
32
  logger = logging.getLogger(__name__)
33
  logging.basicConfig(level=logging.INFO)
34
 
35
+ def check_gpu():
36
+ """Check if GPU is available and log memory usage."""
37
+ if torch.cuda.is_available():
38
+ logger.info(f"CUDA is available. Allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
39
+ return True
40
+ logger.warning("CUDA is NOT available. Falling back to CPU.")
41
+ return False
42
+
43
  # --- T4 GPU Optimizations ---
44
  def setup_t4_environment():
45
  """Configure PyTorch and CUDA for Tesla T4"""
 
76
  def __init__(self):
77
  self.memory_window = 96
78
  self.cleanup_every = 20
 
79
  def adapt(self):
80
  """Adjust parameters based on current VRAM availability"""
81
  if not torch.cuda.is_available():
 
145
  cap.release()
146
  if not ret:
147
  raise ValueError("Failed to read video frame")
 
148
  h, w = frame.shape[:2]
149
  if max(h, w) > 1080:
150
  scale = 1080 / max(h, w)
151
  frame = cv2.resize(frame, (int(w * scale), int(h * scale)))
 
152
  with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
153
  predictor.set_image(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
154
  masks, _, _ = predictor.predict(
 
165
  fps = cap.get(cv2.CAP_PROP_FPS)
166
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
167
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
168
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
169
  out = cv2.VideoWriter(output_path, fourcc, fps, (width, height), isColor=False)
170
  frame_buffer = deque(maxlen=window_size)
 
171
  while True:
172
  ret, frame = cap.read()
173
  if not ret:
 
177
  frame_buffer.append(frame.astype(np.float32))
178
  smoothed = np.mean(frame_buffer, axis=0).astype(np.uint8)
179
  out.write(smoothed)
 
180
  cap.release()
181
  out.release()
182
  return output_path
 
197
  output_path
198
  ]
199
  subprocess.run(cmd, check=True, capture_output=True)
 
200
  # Verify alpha channel
201
  cap = cv2.VideoCapture(output_path)
202
  ret, frame = cap.read()
 
204
  logger.info(f"FFmpeg MOV: Shape={frame.shape} | Alpha={np.unique(frame[:, :, 3])}")
205
  cap.release()
206
  return output_path
 
207
  except Exception as e:
208
  logger.error(f"FFmpeg MOV creation failed: {e}")
209
  return None
 
214
  logger.info("Stage 1: Creating transparent video")
215
  heartbeat_flag = {"running": True}
216
  threading.Thread(target=heartbeat_monitor, args=(heartbeat_flag,), daemon=True).start()
 
217
  try:
218
  # Load models
219
  sam2_predictor = load_sam2_predictor()
220
  matanyone_processor = load_matanyone_processor()
221
  if not sam2_predictor or not matanyone_processor:
222
  raise RuntimeError("Failed to load models")
 
223
  # Process video
224
  with tempfile.TemporaryDirectory() as temp_dir:
225
  temp_dir = Path(temp_dir)
226
  input_path = _normalize_input(input_file, temp_dir)
 
227
  # Extract audio from input video
228
  audio_path = str(temp_dir / "audio.aac")
229
  extract_audio(input_path, audio_path)
 
230
  # Generate first-frame mask
231
  mask = generate_first_frame_mask(input_path, sam2_predictor)
232
  mask_path = str(temp_dir / "mask.png")
233
  cv2.imwrite(mask_path, mask)
 
234
  # MatAnyone processing
235
  foreground_path, alpha_path = matanyone_processor.process_video(
236
  input_path=input_path,
 
238
  output_path=str(temp_dir),
239
  max_size=720
240
  )
 
241
  # Temporal smoothing
242
  smoothed_alpha = smooth_alpha_video(alpha_path, str(temp_dir / "alpha_smoothed.mp4"))
 
243
  # Create transparent MOV
244
  transparent_path = create_transparent_mov(foreground_path, smoothed_alpha, temp_dir)
245
  if not transparent_path:
246
  raise RuntimeError("Transparent MOV creation failed")
 
247
  # Save to persistent storage
248
  persist_path = Path("tmp") / "transparent_video.mov"
249
  shutil.copyfile(transparent_path, persist_path)
 
250
  # Return both transparent video and audio paths for Stage 2
251
  return str(persist_path), audio_path
 
252
  except Exception as e:
253
  logger.error(f"Stage 1 failed: {e}", exc_info=True)
254
  st.error(f"Stage 1 Error: {str(e)}")
 
268
  fps = cap.get(cv2.CAP_PROP_FPS)
269
  width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
270
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
271
  # Prepare background
272
  if bg_type == "image":
273
  bg_array = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
274
  else: # color
275
  bg_array = np.full((height, width, 3), (0, 255, 0), dtype=np.uint8)
276
  bg_resized = cv2.resize(bg_array, (width, height))
 
277
  # Composite frames (no audio yet)
278
  temp_output_path = str(Path("tmp") / "final_video_no_audio.mp4")
279
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
280
  out = cv2.VideoWriter(temp_output_path, fourcc, fps, (width, height))
 
281
  while True:
282
  ret, frame = cap.read()
283
  if not ret:
 
288
  else:
289
  composite = frame # Fallback: no alpha
290
  out.write(composite)
 
291
  cap.release()
292
  out.release()
 
293
  # Mux audio back into the final video
294
  final_output_path = str(Path("tmp") / "final_output.mp4")
295
  if audio_path and os.path.exists(audio_path):
 
302
  else:
303
  logger.warning("No audio found, returning video without audio")
304
  return temp_output_path
 
305
  except Exception as e:
306
  logger.error(f"Stage 2 failed: {e}", exc_info=True)
307
  st.error(f"Stage 2 Error: {str(e)}")