MogensR commited on
Commit
bd64f57
·
1 Parent(s): 80ac736

agent 1.5

Browse files
Files changed (1) hide show
  1. models/matanyone_loader.py +278 -57
models/matanyone_loader.py CHANGED
@@ -105,17 +105,56 @@ class MatAnyoneSession:
105
  """
106
 
107
  def __init__(self, device: Optional[str] = None, precision: str = "auto"):
 
 
 
 
 
 
108
  self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
109
- self.precision = precision
110
  self._core = None
111
  self._api_mode = None # "step", "process_frame", or "process_video"
 
 
 
 
112
  self._lazy_init()
113
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  def _lazy_init(self) -> None:
 
115
  try:
116
  from matanyone.inference.inference_core import InferenceCore # type: ignore
 
 
117
  except Exception as e:
118
- raise MatAnyError(f"MatAnyone import failed: {e}")
 
 
 
 
 
 
 
 
119
 
120
  # Try zero-arg first, then repo-id variant
121
  try:
@@ -203,16 +242,38 @@ def _run_frame(self, frame_bgr: np.ndarray, seed_1hw: Optional[np.ndarray], is_f
203
  raise MatAnyError(f"mask_t must be HW; got {tuple(mask_t.shape)}")
204
 
205
  # --- Process with MatAnyone ---
206
- with torch.no_grad(), self._maybe_amp():
207
- try:
 
208
  if self._api_mode == "step":
209
  alpha = self._core.step(img_t, mask_t) if mask_t is not None else self._core.step(img_t)
210
  elif self._api_mode == "process_frame":
211
  alpha = self._core.process_frame(img_t, mask_t)
212
  else:
213
- raise MatAnyError("Internal: _run_frame used while API mode is 'process_video'.")
214
- except Exception as e:
215
- raise MatAnyError(f"MatAnyone processing failed: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
  # --- Process output ---
218
  # Convert to numpy and ensure correct shape/range
@@ -306,18 +367,42 @@ def process_stream(
306
  """Process video stream with MatAnyone.
307
 
308
  Args:
309
- video_path: Input video file
310
  seed_mask_path: Optional seed mask image (grayscale, same size as video)
311
  out_dir: Output directory (default: video_path.parent)
312
  progress_cb: Callback for progress updates (signature: (float, str) or (str,))
313
 
314
  Returns:
315
  Tuple of (alpha_path, fg_path) output video paths
 
 
 
 
 
316
  """
 
 
 
 
 
 
 
317
  if out_dir is None:
318
  out_dir = video_path.parent
 
319
  out_dir = Path(out_dir)
320
  out_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  cap = cv2.VideoCapture(str(video_path))
323
  if not cap.isOpened():
@@ -330,32 +415,64 @@ def process_stream(
330
  cap.release()
331
 
332
  log.info(f"[MATANY] Processing {N} frames ({W}x{H} @ {fps:.1f}fps) from {video_path}")
 
333
 
334
  if self._api_mode == "process_video":
335
  # --- PATH-BASED CALL (this wheel expects a video path, not tensors) ---
336
- _emit_progress(progress_cb, 0.05, "MatAnyone (video mode)")
337
-
338
- # Some builds accept (video_path, seed_mask_path), others just (video_path)
339
- try:
340
- res = self._core.process_video(str(video_path),
341
- str(seed_mask_path) if seed_mask_path is not None else None)
342
- except TypeError:
343
- # Fallback: only video path
344
- res = self._core.process_video(str(video_path))
345
-
346
- # Normalize whatever we got back into alpha.mp4 + fg.mp4 in out_dir
347
- alpha_path, fg_path = self._harvest_process_video_output(res, out_dir, base=video_path.stem)
348
- _validate_nonempty(alpha_path)
349
- _validate_nonempty(fg_path)
350
- _emit_progress(progress_cb, 1.0, "MatAnyone complete")
351
- return alpha_path, fg_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  else:
353
  # Frame-by-frame (preferred)
354
  log.info(f"[MATANY] Using frame-by-frame mode: {self._api_mode}")
 
 
355
  cap = cv2.VideoCapture(str(video_path))
356
  alpha_path = out_dir / "alpha.mp4"
357
  fg_path = out_dir / "fg.mp4"
358
 
 
 
359
  alpha_writer = cv2.VideoWriter(
360
  str(alpha_path),
361
  cv2.VideoWriter_fourcc(*'mp4v'),
@@ -370,6 +487,9 @@ def process_stream(
370
  (W, H),
371
  isColor=True
372
  )
 
 
 
373
 
374
  try:
375
  # Load seed mask if provided
@@ -378,40 +498,141 @@ def process_stream(
378
  seed_1hw = _read_mask_hw(seed_mask_path, (H, W))
379
 
380
  idx = 0
381
- while True:
382
- ret, frame = cap.read()
383
- if not ret:
384
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
- if idx % 10 == 0:
387
- _emit_progress(progress_cb, min(0.999, (idx / N) if N > 0 else 0.0),
388
- f"MatAnyone matting… ({idx}/{N})")
 
 
 
389
 
390
- log.debug(f"[MATANY] Processing frame {idx+1}/{N}")
391
- # Only pass seed mask on first frame
392
- current_mask = seed_1hw if idx == 0 else None
393
- alpha_hw = self._run_frame(frame, current_mask, is_first=(idx == 0))
394
-
395
- # compose fg for immediate write
396
- # alpha 0..1 -> 0..255 3-channel grayscale
397
- alpha_u8 = (alpha_hw * 255.0 + 0.5).astype(np.uint8)
398
- alpha_rgb = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
399
- # Blend: fg = alpha*frame + (1-alpha)*black == alpha*frame
400
- fg_bgr = (frame.astype(np.float32) * (alpha_hw[..., None] / 255.0)).astype(np.uint8)
401
-
402
- # Write outputs
403
- alpha_writer.write(alpha_rgb)
404
- fg_writer.write(fg_bgr)
405
- idx += 1
406
-
407
- finally:
408
- cap.release()
409
- alpha_writer.release()
410
- fg_writer.release()
411
- _validate_nonempty(alpha_path)
412
- _validate_nonempty(fg_path)
413
- _emit_progress(progress_cb, 1.0, "MatAnyone complete")
414
- return alpha_path, fg_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
 
416
  def _flush_chunk(self, frames_bgr, seed_1hw, alpha_writer, fg_writer):
417
  """Process a chunk of frames with MatAnyone."""
 
105
  """
106
 
107
  def __init__(self, device: Optional[str] = None, precision: str = "auto"):
108
+ """Initialize MatAnyoneSession with optional device and precision settings.
109
+
110
+ Args:
111
+ device: Device to run on (e.g., 'cuda', 'cpu', 'cuda:0'). If None, auto-detects CUDA.
112
+ precision: One of 'auto', 'fp32', or 'fp16'. 'auto' uses fp16 if CUDA is available.
113
+ """
114
  self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
115
+ self.precision = precision.lower()
116
  self._core = None
117
  self._api_mode = None # "step", "process_frame", or "process_video"
118
+ self._frame_times = []
119
+ self._start_time = 0.0
120
+ self._gpu_mem_allocated = 0.0
121
+ self._gpu_mem_cached = 0.0
122
  self._lazy_init()
123
+
124
+ # Log initialization
125
+ log.info(f"Initialized MatAnyoneSession on {self.device} with precision {self.precision}")
126
+ if torch.cuda.is_available():
127
+ log.info(f"CUDA device: {torch.cuda.get_device_name(self.device)}")
128
+ self._log_gpu_memory()
129
+
130
+ def _log_gpu_memory(self) -> None:
131
+ """Log current GPU memory usage."""
132
+ if torch.cuda.is_available():
133
+ try:
134
+ allocated = torch.cuda.memory_allocated(self.device) / 1024**2
135
+ cached = torch.cuda.memory_reserved(self.device) / 1024**2
136
+ log.info(f"GPU Memory - Allocated: {allocated:.1f}MB, Cached: {cached:.1f}MB")
137
+ return allocated, cached
138
+ except Exception as e:
139
+ log.warning(f"Failed to get GPU memory info: {e}")
140
+ return 0.0, 0.0
141
+
142
  def _lazy_init(self) -> None:
143
+ """Lazy initialization of the MatAnyone inference core."""
144
  try:
145
  from matanyone.inference.inference_core import InferenceCore # type: ignore
146
+ except ImportError as e:
147
+ raise MatAnyError(f"Failed to import MatAnyone: {e}. Please ensure it's installed correctly.")
148
  except Exception as e:
149
+ raise MatAnyError(f"Unexpected error during MatAnyone import: {e}")
150
+
151
+ # Log GPU info
152
+ if torch.cuda.is_available():
153
+ log.info(f"[GPU] CUDA is available. Device: {torch.cuda.get_device_name(0)}")
154
+ log.info(f"[GPU] Memory allocated: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
155
+ log.info(f"[GPU] Memory cached: {torch.cuda.memory_reserved()/1024**2:.1f}MB")
156
+ else:
157
+ log.warning("[GPU] CUDA is not available. Using CPU (this will be slow!)")
158
 
159
  # Try zero-arg first, then repo-id variant
160
  try:
 
242
  raise MatAnyError(f"mask_t must be HW; got {tuple(mask_t.shape)}")
243
 
244
  # --- Process with MatAnyone ---
245
+ frame_start_time = time.time()
246
+ try:
247
+ with torch.no_grad(), self._maybe_amp():
248
  if self._api_mode == "step":
249
  alpha = self._core.step(img_t, mask_t) if mask_t is not None else self._core.step(img_t)
250
  elif self._api_mode == "process_frame":
251
  alpha = self._core.process_frame(img_t, mask_t)
252
  else:
253
+ raise MatAnyError("Internal error: Invalid API mode")
254
+
255
+ # Log performance metrics
256
+ frame_time = time.time() - frame_start_time
257
+ self._frame_times.append(frame_time)
258
+ if len(self._frame_times) > 10: # Keep last 10 frame times
259
+ self._frame_times.pop(0)
260
+
261
+ # Log GPU memory every 10 frames
262
+ if len(self._frame_times) % 10 == 0:
263
+ self._log_gpu_memory()
264
+
265
+ return alpha
266
+
267
+ except torch.cuda.OutOfMemoryError:
268
+ self._log_gpu_memory()
269
+ raise MatAnyError("CUDA out of memory. Try reducing the input resolution or batch size.")
270
+ except RuntimeError as e:
271
+ if "CUDA" in str(e):
272
+ self._log_gpu_memory()
273
+ raise MatAnyError(f"CUDA error: {e}")
274
+ raise MatAnyError(f"Runtime error: {e}")
275
+ except Exception as e:
276
+ raise MatAnyError(f"Processing failed: {e}")
277
 
278
  # --- Process output ---
279
  # Convert to numpy and ensure correct shape/range
 
367
  """Process video stream with MatAnyone.
368
 
369
  Args:
370
+ video_path: Input video file path (must exist and be readable)
371
  seed_mask_path: Optional seed mask image (grayscale, same size as video)
372
  out_dir: Output directory (default: video_path.parent)
373
  progress_cb: Callback for progress updates (signature: (float, str) or (str,))
374
 
375
  Returns:
376
  Tuple of (alpha_path, fg_path) output video paths
377
+
378
+ Raises:
379
+ MatAnyError: If processing fails for any reason
380
+ FileNotFoundError: If input files are not found
381
+ ValueError: If input parameters are invalid
382
  """
383
+ # Input validation
384
+ if not video_path.exists():
385
+ raise FileNotFoundError(f"Input video not found: {video_path}")
386
+
387
+ if seed_mask_path is not None and not seed_mask_path.exists():
388
+ raise FileNotFoundError(f"Seed mask not found: {seed_mask_path}")
389
+
390
  if out_dir is None:
391
  out_dir = video_path.parent
392
+
393
  out_dir = Path(out_dir)
394
  out_dir.mkdir(parents=True, exist_ok=True)
395
+
396
+ # Initialize progress tracking
397
+ self._frame_times = []
398
+ self._start_time = time.time()
399
+ _emit_progress(progress_cb, 0.0, "Initializing video processing...")
400
+
401
+ # Log GPU status
402
+ if torch.cuda.is_available():
403
+ _emit_progress(progress_cb, 0.01, "GPU detected, initializing CUDA...")
404
+ else:
405
+ _emit_progress(progress_cb, 0.01, "No GPU detected, using CPU (slower)...")
406
 
407
  cap = cv2.VideoCapture(str(video_path))
408
  if not cap.isOpened():
 
415
  cap.release()
416
 
417
  log.info(f"[MATANY] Processing {N} frames ({W}x{H} @ {fps:.1f}fps) from {video_path}")
418
+ _emit_progress(progress_cb, 0.05, f"Processing {N} frames ({W}x{H} @ {fps:.1f}fps)")
419
 
420
  if self._api_mode == "process_video":
421
  # --- PATH-BASED CALL (this wheel expects a video path, not tensors) ---
422
+ _emit_progress(progress_cb, 0.1, "Using MatAnyone video mode (GPU-accelerated)")
423
+
424
+ # Log before starting video processing
425
+ if torch.cuda.is_available():
426
+ mem_alloc, _ = self._log_gpu_memory()
427
+ _emit_progress(progress_cb, 0.12, f"GPU memory before processing: {mem_alloc:.1f}MB")
428
+
429
+ # Some builds accept (video_path, seed_mask_path), others just (video_path)
430
+ try:
431
+ _emit_progress(progress_cb, 0.15, "Starting video processing with mask...")
432
+ res = self._core.process_video(
433
+ str(video_path),
434
+ str(seed_mask_path) if seed_mask_path is not None else None
435
+ )
436
+ except TypeError as e:
437
+ if "takes 2 positional arguments but 3 were given" in str(e):
438
+ _emit_progress(progress_cb, 0.15, "Starting video processing without mask...")
439
+ res = self._core.process_video(str(video_path))
440
+ else:
441
+ raise
442
+
443
+ # Log after processing
444
+ if torch.cuda.is_available():
445
+ _emit_progress(progress_cb, 0.9, f"Processing complete. GPU memory used: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
446
+ else:
447
+ _emit_progress(progress_cb, 0.9, "Processing complete.")
448
+
449
+ # Normalize output files
450
+ _emit_progress(progress_cb, 0.95, "Finalizing output files...")
451
+ alpha_path, fg_path = self._harvest_process_video_output(res, out_dir, base=video_path.stem)
452
+ _validate_nonempty(alpha_path)
453
+ _validate_nonempty(fg_path)
454
+
455
+ _emit_progress(progress_cb, 1.0, "Processing complete!")
456
+ return alpha_path, fg_path
457
+
458
+ except Exception as e:
459
+ error_msg = f"Error during video processing: {str(e)}"
460
+ log.error(error_msg, exc_info=True)
461
+ if torch.cuda.is_available():
462
+ error_msg += f"\nGPU Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB allocated"
463
+ _emit_progress(progress_cb, -1, error_msg)
464
+ raise MatAnyError(error_msg) from e
465
  else:
466
  # Frame-by-frame (preferred)
467
  log.info(f"[MATANY] Using frame-by-frame mode: {self._api_mode}")
468
+ _emit_progress(progress_cb, 0.1, f"Using {self._api_mode} mode (frame-by-frame)")
469
+
470
  cap = cv2.VideoCapture(str(video_path))
471
  alpha_path = out_dir / "alpha.mp4"
472
  fg_path = out_dir / "fg.mp4"
473
 
474
+ # Initialize video writers
475
+ _emit_progress(progress_cb, 0.12, "Initializing video writers...")
476
  alpha_writer = cv2.VideoWriter(
477
  str(alpha_path),
478
  cv2.VideoWriter_fourcc(*'mp4v'),
 
487
  (W, H),
488
  isColor=True
489
  )
490
+
491
+ if not alpha_writer.isOpened() or not fg_writer.isOpened():
492
+ raise MatAnyError("Failed to initialize video writers")
493
 
494
  try:
495
  # Load seed mask if provided
 
498
  seed_1hw = _read_mask_hw(seed_mask_path, (H, W))
499
 
500
  idx = 0
501
+ last_progress_update = 0
502
+ frame_times = []
503
+ start_time = time.time()
504
+
505
+ try:
506
+ while True:
507
+ ret, frame = cap.read()
508
+ if not ret:
509
+ break
510
+
511
+ frame_start_time = time.time()
512
+
513
+ # Update progress more frequently (every 1% or 5 frames, whichever is more frequent)
514
+ current_progress = (idx / N) if N > 0 else 0.0
515
+ if idx % max(5, N//100) == 0 or time.time() - last_progress_update > 2.0:
516
+ # Calculate progress metrics
517
+ elapsed = time.time() - start_time
518
+ if idx > 0 and current_progress > 0:
519
+ # Calculate ETA
520
+ eta_seconds = (elapsed / current_progress) * (1 - current_progress)
521
+ if eta_seconds > 3600:
522
+ eta_str = f"{eta_seconds/3600:.1f} hours"
523
+ elif eta_seconds > 60:
524
+ eta_str = f"{eta_seconds/60:.1f} minutes"
525
+ else:
526
+ eta_str = f"{eta_seconds:.0f} seconds"
527
+
528
+ # Calculate processing speed
529
+ fps = idx / elapsed if elapsed > 0 else 0
530
+
531
+ # Add GPU memory info if available
532
+ gpu_info = ""
533
+ if torch.cuda.is_available():
534
+ mem_alloc = torch.cuda.memory_allocated() / 1024**2
535
+ mem_cached = torch.cuda.memory_reserved() / 1024**2
536
+ gpu_info = f" | GPU: {mem_alloc:.1f}/{mem_cached:.1f}MB"
537
+
538
+ status = (f"Processing frame {idx+1}/{N} (ETA: {eta_str}, "
539
+ f"{fps:.1f} FPS{gpu_info}")
540
+ _emit_progress(progress_cb, min(0.99, current_progress), status)
541
+ last_progress_update = time.time()
542
+
543
+ # Process frame
544
+ log.debug(f"[MATANY] Processing frame {idx+1}/{N}")
545
+ # Only pass seed mask on first frame
546
+ current_mask = seed_1hw if idx == 0 else None
547
+ alpha_hw = self._run_frame(frame, current_mask, is_first=(idx == 0))
548
+
549
+ # Calculate frame processing time
550
+ frame_time = time.time() - frame_start_time
551
+ frame_times.append(frame_time)
552
+ if len(frame_times) > 10: # Keep last 10 frame times for average
553
+ frame_times.pop(0)
554
+
555
+ # Log GPU memory usage occasionally
556
+ if idx % 50 == 0 and torch.cuda.is_available():
557
+ log.info(f"[GPU] Memory allocated: {torch.cuda.memory_allocated()/1024**2:.1f}MB, "
558
+ f"Cached: {torch.cuda.memory_reserved()/1024**2:.1f}MB, "
559
+ f"Avg frame time: {sum(frame_times)/len(frame_times)*1000:.1f}ms")
560
+
561
+ # Compose output frames
562
+ alpha_u8 = (alpha_hw * 255.0 + 0.5).astype(np.uint8)
563
+ alpha_rgb = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
564
+ fg_bgr = (frame.astype(np.float32) * (alpha_hw[..., None] / 255.0)).astype(np.uint8)
565
+
566
+ # Write outputs
567
+ alpha_writer.write(alpha_rgb)
568
+ fg_writer.write(fg_bgr)
569
+ idx += 1
570
+
571
+ except Exception as e:
572
+ # Log detailed error information
573
+ error_msg = f"Error processing frame {idx+1}/{N}: {str(e)}"
574
+ log.error(error_msg, exc_info=True)
575
 
576
+ # Add GPU memory info if available
577
+ if torch.cuda.is_available():
578
+ mem_alloc = torch.cuda.memory_allocated() / 1024**2
579
+ mem_cached = torch.cuda.memory_reserved() / 1024**2
580
+ error_msg += (f"\nGPU Memory - Allocated: {mem_alloc:.1f}MB, "
581
+ f"Cached: {mem_cached:.1f}MB")
582
 
583
+ # Add frame processing stats
584
+ if self._frame_times:
585
+ avg_time = sum(self._frame_times) / len(self._frame_times)
586
+ error_msg += f"\nAvg frame time: {avg_time*1000:.1f}ms"
587
+
588
+ _emit_progress(progress_cb, -1, f"ERROR: {error_msg}")
589
+ raise MatAnyError(error_msg) from e
590
+
591
+ finally:
592
+ # Cleanup resources
593
+ try:
594
+ if 'cap' in locals() and cap.isOpened():
595
+ cap.release()
596
+ if 'alpha_writer' in locals() and alpha_writer is not None:
597
+ if hasattr(alpha_writer, 'isOpened') and alpha_writer.isOpened():
598
+ alpha_writer.release()
599
+ if 'fg_writer' in locals() and fg_writer is not None:
600
+ if hasattr(fg_writer, 'isOpened') and fg_writer.isOpened():
601
+ fg_writer.release()
602
+
603
+ # Log final stats
604
+ total_time = time.time() - start_time
605
+ fps = idx / total_time if total_time > 0 else 0
606
+
607
+ # Log GPU memory info if available
608
+ gpu_info = ""
609
+ if torch.cuda.is_available():
610
+ mem_alloc = torch.cuda.memory_allocated() / 1024**2
611
+ mem_cached = torch.cuda.memory_reserved() / 1024**2
612
+ gpu_info = f"\nGPU Memory - Allocated: {mem_alloc:.1f}MB, Cached: {mem_cached:.1f}MB"
613
+
614
+ log.info(
615
+ f"[MATANY] Processed {idx} frames in {total_time:.1f}s ({fps:.1f} FPS){gpu_info}"
616
+ )
617
+
618
+ # Validate outputs
619
+ _validate_nonempty(alpha_path)
620
+ _validate_nonempty(fg_path)
621
+
622
+ # Final progress update
623
+ _emit_progress(
624
+ progress_cb,
625
+ 1.0,
626
+ f"Complete! Processed {idx} frames at {fps:.1f} FPS{gpu_info}"
627
+ )
628
+
629
+ return alpha_path, fg_path
630
+
631
+ except Exception as e:
632
+ error_msg = f"Error during cleanup: {str(e)}"
633
+ log.error(error_msg, exc_info=True)
634
+ _emit_progress(progress_cb, -1, f"CLEANUP ERROR: {error_msg}")
635
+ raise MatAnyError(error_msg) from e
636
 
637
  def _flush_chunk(self, frames_bgr, seed_1hw, alpha_writer, fg_writer):
638
  """Process a chunk of frames with MatAnyone."""