MogensR commited on
Commit
63853f9
Β·
1 Parent(s): 7c9eed8
Files changed (5) hide show
  1. app.py +6 -0
  2. models/__init__.py +8 -2
  3. models/matanyone_loader.py +25 -0
  4. models/sam2_loader.py +32 -1
  5. ui.py +113 -18
app.py CHANGED
@@ -37,6 +37,12 @@
37
  print(f"[PATH-CHECK] sys.path[:8] = {sys.path[:8]}")
38
  print(f"[PATH-CHECK] exists(third_party/sam2) = {sam2_path.exists()}")
39
 
 
 
 
 
 
 
40
  # DEBUG: try importing MatAnyone and show its location
41
  try:
42
  import matanyone # noqa: F401
 
37
  print(f"[PATH-CHECK] sys.path[:8] = {sys.path[:8]}")
38
  print(f"[PATH-CHECK] exists(third_party/sam2) = {sam2_path.exists()}")
39
 
40
+ # Force GPU environment variables before any model imports
41
+ os.environ["SAM2_DEVICE"] = "cuda"
42
+ os.environ["MATANY_DEVICE"] = "cuda"
43
+ os.environ["CUDA_LAUNCH_BLOCKING"] = "0" # Allow async CUDA operations
44
+ print(f"[GPU-FORCE] Set SAM2_DEVICE=cuda, MATANY_DEVICE=cuda", flush=True)
45
+
46
  # DEBUG: try importing MatAnyone and show its location
47
  try:
48
  import matanyone # noqa: F401
models/__init__.py CHANGED
@@ -103,9 +103,15 @@ def _pick_device(env_key: str) -> str:
103
  logger.info(f"CUDA environment variables: {cuda_env_vars}")
104
 
105
  logger.info(f"_pick_device({env_key}): requested='{requested}', has_cuda={has_cuda}")
106
- if requested in {"cuda", "cpu"}:
107
- logger.info(f"Using requested device: {requested}")
 
 
 
 
 
108
  return requested
 
109
  result = "cuda" if has_cuda else "cpu"
110
  logger.info(f"Auto-selected device: {result}")
111
  return result
 
103
  logger.info(f"CUDA environment variables: {cuda_env_vars}")
104
 
105
  logger.info(f"_pick_device({env_key}): requested='{requested}', has_cuda={has_cuda}")
106
+
107
+ # Force CUDA if available (empty string counts as no explicit CPU request)
108
+ if has_cuda and requested not in {"cpu"}:
109
+ logger.info(f"FORCING CUDA device (GPU available, requested='{requested}')")
110
+ return "cuda"
111
+ elif requested in {"cuda", "cpu"}:
112
+ logger.info(f"Using explicitly requested device: {requested}")
113
  return requested
114
+
115
  result = "cuda" if has_cuda else "cpu"
116
  logger.info(f"Auto-selected device: {result}")
117
  return result
models/matanyone_loader.py CHANGED
@@ -71,7 +71,24 @@ def load(self) -> bool:
71
  # βœ… top-level wrapper (accepts model/repo id string)
72
  from matanyone import InferenceCore
73
  logger.info("[MatA] init: repo_id=%s device=%s", self.repo_id, self.device)
 
 
 
 
 
 
74
  self.core = InferenceCore(self.repo_id)
 
 
 
 
 
 
 
 
 
 
 
75
  self.loaded = True
76
  logger.info("[MatA] init OK (%.2fs)", time.time() - t0)
77
  return True
@@ -85,8 +102,16 @@ def load(self) -> bool:
85
  def step(self, image: np.ndarray | torch.Tensor, seed_mask: np.ndarray | torch.Tensor) -> np.ndarray:
86
  if not self.loaded or self.core is None:
87
  raise RuntimeError("MatAnyone not loaded")
 
 
 
 
 
88
  img = _to_chw_float01(image).to(self.device, non_blocking=True)
89
  msk = _to_1hw_float01(seed_mask).to(self.device, non_blocking=True)
 
 
 
90
  out = self.core.step(img, msk)
91
  alpha = out[0] if isinstance(out, (tuple, list)) else out
92
  if not isinstance(alpha, torch.Tensor):
 
71
  # βœ… top-level wrapper (accepts model/repo id string)
72
  from matanyone import InferenceCore
73
  logger.info("[MatA] init: repo_id=%s device=%s", self.repo_id, self.device)
74
+
75
+ # Force GPU device if CUDA available
76
+ if torch.cuda.is_available() and self.device != "cpu":
77
+ self.device = "cuda"
78
+ logger.info("[MatA] FORCING CUDA device for GPU acceleration")
79
+
80
  self.core = InferenceCore(self.repo_id)
81
+
82
+ # Verify MatAnyone is using GPU if available
83
+ if hasattr(self.core, 'device'):
84
+ actual_device = getattr(self.core, 'device', 'unknown')
85
+ logger.info(f"[MatA] device verification: expected={self.device}, actual={actual_device}")
86
+
87
+ # Try to move core to device if it has a 'to' method
88
+ if hasattr(self.core, 'to'):
89
+ self.core = self.core.to(self.device)
90
+ logger.info(f"[MatA] moved core to device: {self.device}")
91
+
92
  self.loaded = True
93
  logger.info("[MatA] init OK (%.2fs)", time.time() - t0)
94
  return True
 
102
  def step(self, image: np.ndarray | torch.Tensor, seed_mask: np.ndarray | torch.Tensor) -> np.ndarray:
103
  if not self.loaded or self.core is None:
104
  raise RuntimeError("MatAnyone not loaded")
105
+
106
+ # Force GPU device for tensors
107
+ if torch.cuda.is_available():
108
+ self.device = "cuda"
109
+
110
  img = _to_chw_float01(image).to(self.device, non_blocking=True)
111
  msk = _to_1hw_float01(seed_mask).to(self.device, non_blocking=True)
112
+
113
+ # Verify tensors are on GPU
114
+ logger.info(f"[MatA] step: img device={img.device}, mask device={msk.device}, target device={self.device}")
115
  out = self.core.step(img, msk)
116
  alpha = out[0] if isinstance(out, (tuple, list)) else out
117
  if not isinstance(alpha, torch.Tensor):
models/sam2_loader.py CHANGED
@@ -56,7 +56,21 @@ def load(self, variant: str = DEFAULT_VARIANT, model_id: str = DEFAULT_MODEL_ID)
56
  log.info("Calling build_sam2()...")
57
  model = build_sam2(config_file=full_cfg_path, ckpt_path=ckpt, device=str(self.device))
58
  log.info("build_sam2() completed successfully")
 
 
 
59
  model.eval()
 
 
 
 
 
 
 
 
 
 
 
60
  self.model = model
61
 
62
  try:
@@ -75,13 +89,30 @@ def first_frame_mask(self, image_rgb01):
75
  Returns an initial binary-ish mask for the foreground subject from first frame.
76
  You can refine prompts here (points/boxes) if you add UI hooks later.
77
  """
 
 
 
 
78
  if hasattr(self.predictor, "set_image"):
79
- self.predictor.set_image((image_rgb01*255).astype("uint8"))
 
 
 
 
 
 
80
  # simple auto-box prompt (tight box)
81
  h, w = image_rgb01.shape[:2]
82
  box = np.array([1, 1, w-2, h-2])
83
  masks, _, _ = self.predictor.predict(box=box, multimask_output=False)
84
  mask = masks[0] # HxW bool/float
 
 
 
 
 
 
 
85
  else:
86
  # video predictor path: run_single_frame if available
87
  mask = (image_rgb01[...,0] > -1) # dummy, should not happen
 
56
  log.info("Calling build_sam2()...")
57
  model = build_sam2(config_file=full_cfg_path, ckpt_path=ckpt, device=str(self.device))
58
  log.info("build_sam2() completed successfully")
59
+
60
+ # Explicitly move model to device and verify
61
+ model = model.to(self.device)
62
  model.eval()
63
+
64
+ # Verify model is on correct device
65
+ if hasattr(model, 'parameters'):
66
+ first_param = next(model.parameters(), None)
67
+ if first_param is not None:
68
+ actual_device = first_param.device
69
+ log.info(f"SAM2 model device verification: expected={self.device}, actual={actual_device}")
70
+ if str(actual_device) != str(self.device):
71
+ log.warning(f"SAM2 model device mismatch! Moving to {self.device}")
72
+ model = model.to(self.device)
73
+
74
  self.model = model
75
 
76
  try:
 
89
  Returns an initial binary-ish mask for the foreground subject from first frame.
90
  You can refine prompts here (points/boxes) if you add UI hooks later.
91
  """
92
+ # Ensure input tensor is on correct device
93
+ if isinstance(image_rgb01, torch.Tensor):
94
+ image_rgb01 = image_rgb01.to(self.device, non_blocking=True)
95
+
96
  if hasattr(self.predictor, "set_image"):
97
+ # Convert to numpy for predictor if needed
98
+ if isinstance(image_rgb01, torch.Tensor):
99
+ image_np = (image_rgb01.cpu().numpy() * 255).astype("uint8")
100
+ else:
101
+ image_np = (image_rgb01 * 255).astype("uint8")
102
+
103
+ self.predictor.set_image(image_np)
104
  # simple auto-box prompt (tight box)
105
  h, w = image_rgb01.shape[:2]
106
  box = np.array([1, 1, w-2, h-2])
107
  masks, _, _ = self.predictor.predict(box=box, multimask_output=False)
108
  mask = masks[0] # HxW bool/float
109
+
110
+ # Keep model on GPU - verify device after prediction
111
+ if hasattr(self.model, 'parameters'):
112
+ first_param = next(self.model.parameters(), None)
113
+ if first_param is not None and str(first_param.device) != str(self.device):
114
+ log.warning(f"SAM2 model moved off GPU during prediction! Moving back to {self.device}")
115
+ self.model = self.model.to(self.device)
116
  else:
117
  # video predictor path: run_single_frame if available
118
  mask = (image_rgb01[...,0] > -1) # dummy, should not happen
ui.py CHANGED
@@ -224,19 +224,39 @@ def process_video_with_background_stoppable(
224
  bg_img.save(tmp_bg.name, format="PNG")
225
  bg_path = tmp_bg.name
226
 
227
- # Run pipeline with immediate diagnostic logging
228
- yield gr.update(visible=False), gr.update(visible=True), None, "πŸ”„ Initializing pipeline..."
229
  logger.info(f"=== PIPELINE START ===")
230
 
231
- # GPU diagnostics first
232
  try:
233
  import torch
234
  logger.info(f"βœ… Torch version: {torch.__version__}")
235
  logger.info(f"βœ… CUDA available: {torch.cuda.is_available()}")
 
236
  if torch.cuda.is_available():
237
- logger.info(f"βœ… CUDA device count: {torch.cuda.device_count()}")
238
- logger.info(f"βœ… Current device: {torch.cuda.current_device()}")
239
- logger.info(f"βœ… Device name: {torch.cuda.get_device_name()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  else:
241
  logger.error(f"❌ CUDA NOT AVAILABLE - GPU processing will fail")
242
  yield gr.update(visible=True), gr.update(visible=False), None, "❌ GPU not available - processing will fail"
@@ -246,12 +266,15 @@ def process_video_with_background_stoppable(
246
  yield gr.update(visible=True), gr.update(visible=False), None, f"GPU check error: {e}"
247
  return
248
 
 
249
  logger.info(f"About to import pipeline module...")
250
 
251
  try:
252
  pipe = importlib.import_module("pipeline")
253
  logger.info(f"βœ… Pipeline module imported successfully")
254
- yield gr.update(visible=False), gr.update(visible=True), None, "πŸ“Ή Starting video processing..."
 
 
255
  except Exception as e:
256
  logger.error(f"❌ Pipeline import failed: {e}")
257
  yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline import error: {e}"
@@ -260,14 +283,44 @@ def process_video_with_background_stoppable(
260
  logger.info(f"Calling pipe.process with video_path={video_path}, bg_path={bg_path}")
261
  logger.info(f"=== CALLING PIPELINE.PROCESS ===")
262
 
 
 
 
 
263
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  out_path, diag = pipe.process(
265
  video_path=video_path,
266
  bg_image_path=bg_path,
267
  point_x=None,
268
  point_y=None,
269
  auto_box=True,
270
- work_dir=None
 
271
  )
272
  logger.info(f"=== PIPELINE.PROCESS RETURNED ===")
273
  logger.info(f"Pipeline completed: out_path={out_path}, diag={diag}")
@@ -275,25 +328,67 @@ def process_video_with_background_stoppable(
275
  logger.error(f"❌ Pipeline.process failed: {e}")
276
  import traceback
277
  logger.error(f"Full traceback: {traceback.format_exc()}")
278
- yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline processing error: {e}"
 
 
279
  return
280
 
281
  if out_path:
282
- # Show final processing stats in success message
283
  fps = diag.get('fps', 'unknown')
284
  resolution = diag.get('resolution', 'unknown')
285
  sam2_ok = diag.get('sam2_ok', False)
286
  matany_ok = diag.get('matany_ok', False)
287
  processing_time = diag.get('total_time_sec', 0)
 
 
288
 
289
- status_msg = f"βœ… Processing complete! "
290
- if fps != 'unknown' and resolution != 'unknown':
291
- status_msg += f"Video: {resolution} @ {fps}fps, "
292
- status_msg += f"SAM2: {'βœ“' if sam2_ok else 'βœ—'}, MatAnyone: {'βœ“' if matany_ok else 'βœ—'}"
293
- if processing_time > 0:
294
- status_msg += f", Time: {processing_time:.1f}s"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  else:
296
- status_msg = f"❌ Processing failed: {diag.get('error','unknown error')}"
 
 
 
 
 
297
 
298
  if STOP.stop:
299
  yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
@@ -376,7 +471,7 @@ def create_interface():
376
  with gr.Column(scale=1):
377
  gr.Markdown("## Results")
378
  result_video = gr.Video(label="Processed Video", height=400)
379
- status_output = gr.Textbox(label="Processing Status", lines=5, max_lines=10, elem_classes=["status-box"])
380
  gr.Markdown("""
381
  ### Pipeline
382
  1. SAM2 Segmentation β†’ mask
 
224
  bg_img.save(tmp_bg.name, format="PNG")
225
  bg_path = tmp_bg.name
226
 
227
+ # Run pipeline with enhanced real-time status updates
228
+ yield gr.update(visible=False), gr.update(visible=True), None, "πŸ”„ Initializing pipeline...\n⚑ Checking GPU acceleration..."
229
  logger.info(f"=== PIPELINE START ===")
230
 
231
+ # Enhanced GPU diagnostics with detailed status
232
  try:
233
  import torch
234
  logger.info(f"βœ… Torch version: {torch.__version__}")
235
  logger.info(f"βœ… CUDA available: {torch.cuda.is_available()}")
236
+
237
  if torch.cuda.is_available():
238
+ device_count = torch.cuda.device_count()
239
+ current_device = torch.cuda.current_device()
240
+ device_name = torch.cuda.get_device_name()
241
+ device_capability = torch.cuda.get_device_capability()
242
+
243
+ # Get GPU memory info
244
+ memory_allocated = torch.cuda.memory_allocated() / (1024**3) # GB
245
+ memory_reserved = torch.cuda.memory_reserved() / (1024**3) # GB
246
+ memory_total = torch.cuda.get_device_properties(current_device).total_memory / (1024**3) # GB
247
+
248
+ gpu_status = f"""βœ… GPU Acceleration Active
249
+ πŸ–₯️ Device: {device_name} (Compute {device_capability[0]}.{device_capability[1]})
250
+ πŸ’Ύ Memory: {memory_allocated:.1f}GB allocated / {memory_total:.1f}GB total
251
+ πŸ”§ CUDA {torch.version.cuda} | PyTorch {torch.__version__}
252
+ πŸ“Š Ready for SAM2 + MatAnyone processing..."""
253
+
254
+ logger.info(f"βœ… CUDA device count: {device_count}")
255
+ logger.info(f"βœ… Current device: {current_device}")
256
+ logger.info(f"βœ… Device name: {device_name}")
257
+ logger.info(f"βœ… GPU memory: {memory_allocated:.1f}GB/{memory_total:.1f}GB")
258
+
259
+ yield gr.update(visible=False), gr.update(visible=True), None, gpu_status
260
  else:
261
  logger.error(f"❌ CUDA NOT AVAILABLE - GPU processing will fail")
262
  yield gr.update(visible=True), gr.update(visible=False), None, "❌ GPU not available - processing will fail"
 
266
  yield gr.update(visible=True), gr.update(visible=False), None, f"GPU check error: {e}"
267
  return
268
 
269
+ yield gr.update(visible=False), gr.update(visible=True), None, gpu_status + "\n\nπŸ”„ Loading pipeline modules..."
270
  logger.info(f"About to import pipeline module...")
271
 
272
  try:
273
  pipe = importlib.import_module("pipeline")
274
  logger.info(f"βœ… Pipeline module imported successfully")
275
+
276
+ pipeline_status = gpu_status + "\n\nβœ… Pipeline modules loaded\nπŸ“Ή Initializing video processing pipeline..."
277
+ yield gr.update(visible=False), gr.update(visible=True), None, pipeline_status
278
  except Exception as e:
279
  logger.error(f"❌ Pipeline import failed: {e}")
280
  yield gr.update(visible=True), gr.update(visible=False), None, f"Pipeline import error: {e}"
 
283
  logger.info(f"Calling pipe.process with video_path={video_path}, bg_path={bg_path}")
284
  logger.info(f"=== CALLING PIPELINE.PROCESS ===")
285
 
286
+ # Enhanced status during processing
287
+ processing_status = gpu_status + "\n\nπŸš€ PROCESSING STARTED\n⏱️ Stage 1: Video analysis & SAM2 segmentation..."
288
+ yield gr.update(visible=False), gr.update(visible=True), None, processing_status
289
+
290
  try:
291
+ # Create a progress callback for real-time updates
292
+ def progress_callback(stage, message, progress_pct=None):
293
+ nonlocal processing_status
294
+ import time
295
+ timestamp = time.strftime("%H:%M:%S")
296
+
297
+ if stage == "sam2_loading":
298
+ processing_status = gpu_status + f"\n\nπŸš€ PROCESSING STARTED\n⏱️ [{timestamp}] Stage 1: Loading SAM2 model..."
299
+ elif stage == "sam2_processing":
300
+ processing_status = gpu_status + f"\n\nπŸš€ PROCESSING STARTED\n⏱️ [{timestamp}] Stage 1: SAM2 segmentation in progress..."
301
+ elif stage == "matanyone_loading":
302
+ processing_status = gpu_status + f"\n\nπŸš€ PROCESSING STARTED\nβœ… Stage 1: SAM2 complete\n⏱️ [{timestamp}] Stage 2: Loading MatAnyone model..."
303
+ elif stage == "matanyone_processing":
304
+ processing_status = gpu_status + f"\n\nπŸš€ PROCESSING STARTED\nβœ… Stage 1: SAM2 complete\n⏱️ [{timestamp}] Stage 2: MatAnyone video matting..."
305
+ elif stage == "compositing":
306
+ processing_status = gpu_status + f"\n\nπŸš€ PROCESSING STARTED\nβœ… Stage 1: SAM2 complete\nβœ… Stage 2: MatAnyone complete\n⏱️ [{timestamp}] Stage 3: Final compositing..."
307
+
308
+ if progress_pct:
309
+ processing_status += f" ({progress_pct}%)"
310
+ if message:
311
+ processing_status += f"\nπŸ’¬ {message}"
312
+
313
+ # Note: We can't yield from callback, but we log for debugging
314
+ logger.info(f"Progress: {stage} - {message}")
315
+
316
  out_path, diag = pipe.process(
317
  video_path=video_path,
318
  bg_image_path=bg_path,
319
  point_x=None,
320
  point_y=None,
321
  auto_box=True,
322
+ work_dir=None,
323
+ progress_callback=progress_callback # Pass callback if supported
324
  )
325
  logger.info(f"=== PIPELINE.PROCESS RETURNED ===")
326
  logger.info(f"Pipeline completed: out_path={out_path}, diag={diag}")
 
328
  logger.error(f"❌ Pipeline.process failed: {e}")
329
  import traceback
330
  logger.error(f"Full traceback: {traceback.format_exc()}")
331
+
332
+ error_status = gpu_status + f"\n\n❌ PROCESSING FAILED\n🚨 Error: {str(e)[:200]}..."
333
+ yield gr.update(visible=True), gr.update(visible=False), None, error_status
334
  return
335
 
336
  if out_path:
337
+ # Enhanced final processing stats with detailed breakdown
338
  fps = diag.get('fps', 'unknown')
339
  resolution = diag.get('resolution', 'unknown')
340
  sam2_ok = diag.get('sam2_ok', False)
341
  matany_ok = diag.get('matany_ok', False)
342
  processing_time = diag.get('total_time_sec', 0)
343
+ sam2_time = diag.get('sam2_time_sec', 0)
344
+ matany_time = diag.get('matany_time_sec', 0)
345
 
346
+ # Get final GPU memory usage and verify GPU acceleration was used
347
+ try:
348
+ import torch
349
+ if torch.cuda.is_available():
350
+ final_memory = torch.cuda.memory_allocated() / (1024**3)
351
+ peak_memory = torch.cuda.max_memory_allocated() / (1024**3)
352
+
353
+ # Log GPU utilization to verify models used GPU
354
+ logger.info(f"GPU USAGE VERIFICATION:")
355
+ logger.info(f" Final memory allocated: {final_memory:.2f}GB")
356
+ logger.info(f" Peak memory used: {peak_memory:.2f}GB")
357
+
358
+ if peak_memory < 0.1: # Less than 100MB indicates CPU usage
359
+ logger.warning(f"⚠️ LOW GPU USAGE! Peak memory {peak_memory:.2f}GB suggests CPU fallback")
360
+ else:
361
+ logger.info(f"βœ… GPU ACCELERATION CONFIRMED - Peak usage {peak_memory:.2f}GB")
362
+
363
+ torch.cuda.reset_peak_memory_stats() # Reset for next run
364
+ else:
365
+ final_memory = peak_memory = 0
366
+ logger.warning("❌ CUDA not available - models used CPU")
367
+ except Exception as e:
368
+ logger.error(f"GPU memory check failed: {e}")
369
+ final_memory = peak_memory = 0
370
+
371
+ status_msg = gpu_status + f"""
372
+
373
+ πŸŽ‰ PROCESSING COMPLETE!
374
+ βœ… Stage 1: SAM2 segmentation {'βœ“' if sam2_ok else 'βœ—'} ({sam2_time:.1f}s)
375
+ βœ… Stage 2: MatAnyone matting {'βœ“' if matany_ok else 'βœ—'} ({matany_time:.1f}s)
376
+ βœ… Stage 3: Final compositing complete
377
+
378
+ πŸ“Š RESULTS:
379
+ 🎬 Video: {resolution} @ {fps}fps
380
+ ⏱️ Total time: {processing_time:.1f}s
381
+ πŸ’Ύ Peak GPU memory: {peak_memory:.1f}GB
382
+ πŸš€ Pipeline: SAM2 + MatAnyone + GPU acceleration
383
+
384
+ Ready for download! 🎯"""
385
  else:
386
+ error_details = diag.get('error', 'unknown error')
387
+ status_msg = gpu_status + f"""
388
+
389
+ ❌ PROCESSING FAILED
390
+ 🚨 Error: {error_details}
391
+ πŸ’‘ Check logs for detailed troubleshooting info"""
392
 
393
  if STOP.stop:
394
  yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
 
471
  with gr.Column(scale=1):
472
  gr.Markdown("## Results")
473
  result_video = gr.Video(label="Processed Video", height=400)
474
+ status_output = gr.Textbox(label="Processing Status", lines=8, max_lines=15, elem_classes=["status-box"])
475
  gr.Markdown("""
476
  ### Pipeline
477
  1. SAM2 Segmentation β†’ mask