MogensR commited on
Commit
f5cc616
·
1 Parent(s): 6ca9173

revelations

Browse files
Files changed (1) hide show
  1. models/matanyone_loader.py +138 -335
models/matanyone_loader.py CHANGED
@@ -1,30 +1,24 @@
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
- MatAnyone adapter — SAM2-seeded, streaming, build-agnostic.
5
 
6
- - SAM2 defines the subject (seed mask) on frame 0.
7
- - MatAnyone does frame-by-frame alpha matting.
8
- - Prefers step([B,C,H,W]) with T=1 squeeze patch for conv2d compatibility.
9
- - Falls back to process_frame([H,W,3]) if supported.
10
 
11
- Changes (2025-09-16):
12
- - Aligned with torch==2.3.1+cu121 and MatAnyone v1.0.0
13
- - Added shape logging in _call_step to verify 5D-to-4D squeeze
14
- - Set MATANY_FORCE_FORMAT=4d as default
15
- - Added VRAM logging in process_stream (MATANY_LOG_VRAM=1)
16
- - Enhanced _safe_empty_cache with memory_summary
17
- - Added MatAnyone version logging
18
- - Added MatAnyoneModel wrapper class for app_hf.py compatibility
19
  """
20
 
21
  from __future__ import annotations
22
  import os
23
- import cv2
24
  import time
25
  import logging
26
- import numpy as np
27
- import torch
28
  import importlib.metadata
29
  from pathlib import Path
30
  from typing import Optional, Callable, Tuple
@@ -64,13 +58,17 @@ class MatAnyError(RuntimeError):
64
  pass
65
 
66
  # ---------- CUDA helpers ----------
67
- def _cuda_snapshot(device: Optional[torch.device]) -> str:
68
  try:
 
69
  if not torch.cuda.is_available():
70
  return "CUDA: N/A"
71
  idx = 0
72
- if isinstance(device, torch.device) and device.type == "cuda" and device.index is not None:
73
- idx = device.index
 
 
 
74
  name = torch.cuda.get_device_name(idx)
75
  alloc = torch.cuda.memory_allocated(idx) / (1024**3)
76
  resv = torch.cuda.memory_reserved(idx) / (1024**3)
@@ -79,254 +77,48 @@ def _cuda_snapshot(device: Optional[torch.device]) -> str:
79
  return f"CUDA snapshot error: {e!r}"
80
 
81
  def _safe_empty_cache():
82
- if not torch.cuda.is_available():
83
- return
84
  try:
85
- log.info(f"[MATANY] CUDA memory before empty_cache: {_cuda_snapshot(None)}")
86
- torch.cuda.empty_cache()
87
- log.info(f"[MATANY] CUDA memory after empty_cache: {_cuda_snapshot(None)}")
88
- if os.getenv("MATANY_LOG_VRAM", "0") == "1":
89
- log.debug(f"[MATANY] VRAM summary:\n{torch.cuda.memory_summary()}")
90
  except Exception:
91
  pass
92
 
93
- # ---------- SAM2 → seed mask prep ----------
94
- def _prepare_seed_mask(sam2_mask: np.ndarray, H: int, W: int) -> np.ndarray:
95
- """
96
- Normalize to float32 [H,W] in {0,1}, white=FG.
97
- Auto-invert if >60% ON (likely wrong polarity).
98
- """
99
- if not isinstance(sam2_mask, np.ndarray):
100
- raise MatAnyError(f"SAM2 mask must be numpy array, got {type(sam2_mask)}")
101
- if sam2_mask.ndim == 3 and sam2_mask.shape[2] == 3:
102
- sam2_mask = cv2.cvtColor(sam2_mask, cv2.COLOR_BGR2GRAY)
103
- if sam2_mask.ndim != 2:
104
- raise MatAnyError(f"SAM2 mask must be 2D, got shape {sam2_mask.shape}")
105
-
106
- if sam2_mask.shape != (H, W):
107
- sam2_mask = cv2.resize(sam2_mask, (W, H), interpolation=cv2.INTER_NEAREST)
108
-
109
- m = sam2_mask.astype(np.float32)
110
- if m.max() > 1.0:
111
- m /= 255.0
112
- m = np.clip(m, 0.0, 1.0)
113
-
114
- if (m > 0.5).mean() > 0.60:
115
- m = 1.0 - m
116
-
117
- return (m > 0.5).astype(np.float32)
118
-
119
- # ---------- Frame conversion ----------
120
- def _frame_bgr_to_hwc_rgb_numpy(frame) -> np.ndarray:
121
- """Accept HWC/CHW BGR uint8 → return HWC RGB uint8."""
122
- if not isinstance(frame, np.ndarray) or frame.ndim != 3:
123
- raise MatAnyError(f"Frame must be HWC/CHW numpy array, got {type(frame)}, shape={getattr(frame, 'shape', None)}")
124
- arr = frame
125
- if arr.shape[0] == 3 and arr.shape[2] != 3: # CHW → HWC
126
- arr = np.transpose(arr, (1, 2, 0))
127
- if arr.dtype != np.uint8:
128
- raise MatAnyError(f"Frame must be uint8, got {arr.dtype}")
129
- return cv2.cvtColor(arr, cv2.COLOR_BGR2RGB)
130
-
131
  # ============================================================================
132
 
133
  class MatAnyoneSession:
134
  """
135
- Streaming wrapper that seeds MatAnyone on frame 0.
136
- Prefers step([B,C,H,W]) with T=1 squeeze patch for conv2d compatibility.
137
- Falls back to process_frame([H,W,3]) if supported.
138
  """
139
  def __init__(self, device: Optional[str] = None, precision: str = "auto"):
140
- from .matany_compat_patch import apply_matany_t1_squeeze_guard
141
-
142
- self.device = torch.device(device) if device else (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
143
  self.precision = precision.lower()
144
-
145
- # Apply T=1 squeeze patch
146
- if apply_matany_t1_squeeze_guard():
147
- log.info("[MATANY] T=1 squeeze patch applied for MatAnyone")
148
- else:
149
- log.warning("[MATANY] T=1 squeeze patch failed; conv2d errors may occur")
150
-
151
  # Log MatAnyone version
152
  try:
153
  version = importlib.metadata.version("matanyone")
154
  log.info(f"[MATANY] MatAnyone version: {version}")
155
  except Exception:
156
  log.info("[MATANY] MatAnyone version unknown")
157
-
158
- # API/format overrides for debugging
159
- api_force = os.getenv("MATANY_FORCE_API", "").strip().lower() # "process" or "step"
160
- fmt_force = os.getenv("MATANY_FORCE_FORMAT", "4d").strip().lower() # "4d" or "5d"
161
- self._force_api_process = (api_force == "process")
162
- self._force_api_step = (api_force == "step")
163
- self._force_4d = (fmt_force == "4d") or not fmt_force # Default to 4D
164
- self._force_5d = (fmt_force == "5d")
165
-
166
- try:
167
- from matanyone.inference.inference_core import InferenceCore
168
- except ImportError as e:
169
- raise MatAnyError(f"Failed to import MatAnyone: {e}")
170
  try:
171
- self.core = InferenceCore()
172
- except TypeError:
173
- self.core = InferenceCore("PeiqingYang/MatAnyone")
174
-
175
- self._has_process = hasattr(self.core, "process_frame")
176
- self._has_step = hasattr(self.core, "step")
177
- if not (self._has_process or self._has_step):
178
- raise MatAnyError("MatAnyone core exposes neither 'process_frame' nor 'step'")
179
-
180
- # Prefer step unless forced to process_frame
181
- if self._force_api_process and not self._has_process:
182
- raise MatAnyError("MATANY_FORCE_API=process but core.process_frame is missing")
183
- if self._force_api_step and not self._has_step:
184
- raise MatAnyError("MATANY_FORCE_API=step but core.step is missing")
185
-
186
- self._api = "process_frame" if (self._has_process and not self._force_api_step) else "step"
187
- self._use_5d = bool(self._force_5d) # Only for step mode; rarely needed post-patch
188
-
189
- log.info(f"[MATANY] APIs: process_frame={self._has_process}, step={self._has_step} | active={self._api} | force4d={self._force_4d} force5d={self._force_5d}")
190
-
191
- # AMP only affects step() path where we use torch tensors
192
- def _amp(self):
193
- if self.device.type != "cuda":
194
- return torch.amp.autocast(device_type="cuda", enabled=False)
195
- if self.precision == "fp32":
196
- return torch.amp.autocast(device_type="cuda", enabled=False)
197
- if self.precision == "fp16":
198
- return torch.amp.autocast(device_type="cuda", enabled=True, dtype=torch.float16)
199
- return torch.amp.autocast(device_type="cuda", enabled=True)
200
-
201
- # ----- Tensor builders for step() mode -----
202
- def _to_tensors(self, img_hwc_rgb: np.ndarray, mask_hw: Optional[np.ndarray]):
203
- img = torch.from_numpy(img_hwc_rgb).to(self.device)
204
- if img.dtype != torch.float32:
205
- img = img.float()
206
- if float(img.max().item()) > 1.0:
207
- img = img / 255.0
208
-
209
- img_chw = img.permute(2, 0, 1).contiguous() # [3,H,W]
210
- img_4d = img_chw.unsqueeze(0) # [1,3,H,W]
211
- img_5d = img_chw.unsqueeze(0).unsqueeze(0) # [1,1,3,H,W]
212
-
213
- mask_4d = mask_5d = None
214
- if mask_hw is not None:
215
- m = torch.from_numpy(mask_hw).to(self.device)
216
- if m.dtype != torch.float32:
217
- m = m.float()
218
- m = (m >= 0.5).float() if float(m.max().item()) <= 1.0 else (m >= 128).float()
219
- mask_4d = m.unsqueeze(0).unsqueeze(0).contiguous() # [1,1,H,W]
220
- mask_5d = mask_4d.unsqueeze(1).contiguous() # [1,1,1,H,W]
221
- return img_4d, img_5d, mask_4d, mask_5d
222
-
223
- # ----- Core call: process_frame fallback, step preferred -----
224
- def _call_process_frame(self, rgb_hwc: np.ndarray, seed_mask_hw: Optional[np.ndarray], is_first: bool):
225
- """Try numpy path first; fallback to torch path if the wheel requests tensors."""
226
- seed = seed_mask_hw if is_first else None
227
-
228
- # 1) Most wheels want numpy HWC + 2D mask (float 0..1 or uint8)
229
  try:
230
- return self.core.process_frame(rgb_hwc, seed)
231
- except TypeError as e_np:
232
- msg = str(e_np).lower()
233
- # 2) Some wheels want torch [B,C,H,W] tensors even in process_frame
234
- if "tensor" in msg or "expected" in msg or "conv2d" in msg:
235
- img_4d, _, mask_4d, _ = self._to_tensors(rgb_hwc, seed)
236
- with torch.no_grad(), self._amp():
237
- try:
238
- return self.core.process_frame(img_4d, mask_4d)
239
- except Exception as e_t:
240
- raise MatAnyError(f"process_frame tensor path failed: {e_t}") from e_t
241
- raise
242
-
243
- def _call_step(self, rgb_hwc: np.ndarray, seed_mask_hw: Optional[np.ndarray], is_first: bool):
244
- """Use 4D [B,C,H,W] by default; retry with 5D only if forced."""
245
- img_4d, img_5d, mask_4d, mask_5d = self._to_tensors(rgb_hwc, seed_mask_hw if is_first else None)
246
-
247
- def run(use_5d: bool):
248
- img = img_5d if use_5d else img_4d
249
- msk = mask_5d if use_5d else mask_4d
250
- log.debug(f"[MATANY] Step input: img={img.shape}, mask={msk.shape if msk is not None else None}, is_first={is_first}")
251
- if is_first and msk is not None:
252
- try:
253
- return self.core.step(img, msk, is_first=True)
254
- except TypeError:
255
- return self.core.step(img, msk)
256
- else:
257
- return self.core.step(img)
258
-
259
- with torch.no_grad(), self._amp():
260
- if self._force_4d:
261
- return run(False)
262
- if self._force_5d:
263
- return run(True)
264
-
265
- if self._use_5d:
266
- try:
267
- return run(True)
268
- except RuntimeError as e5:
269
- m5 = str(e5)
270
- if "expected 3d" in m5.lower() and "4d" in m5 and "conv2d" in m5.lower():
271
- log.info("[MATANY] 5D rejected by wheel (conv2d wants 3D/4D). Falling back to 4D.")
272
- self._use_5d = False
273
- return run(False)
274
- raise MatAnyError(f"Runtime error (step/5D): {m5}") from e5
275
-
276
- try:
277
- return run(False) # 4D
278
- except RuntimeError as e4:
279
- m4 = str(e4)
280
- needs_5d = any(kw in m4 for kw in ["expected 5D", "expects 5D", "input.dim() == 5", "but got 4D", "got input of size: [1, 3,"])
281
- if needs_5d:
282
- log.info("[MATANY] Wheel appears to expect 5D — retrying with [1,1,3,H,W] and [1,1,1,H,W].")
283
- self._use_5d = True
284
- try:
285
- return run(True)
286
- except RuntimeError as e5b:
287
- m5b = str(e5b)
288
- if "expected 3d" in m5b.lower() and "4d" in m5b and "conv2d" in m5b.lower():
289
- self._use_5d = False
290
- raise MatAnyError(f"Wheel ultimately expects 4D (conv2d). Original 4D error: {m4}") from e4
291
- raise MatAnyError(f"step/5D attempt failed: {m5b}") from e5b
292
- if "cuda" in m4.lower():
293
- snap = _cuda_snapshot(self.device)
294
- raise MatAnyError(f"CUDA runtime error: {m4} | {snap}") from e4
295
- raise MatAnyError(f"Runtime error (step/4D): {m4}") from e4
296
-
297
- # ----- Per-frame runner -----
298
- def _run_frame(self, frame_bgr: np.ndarray, sam2_mask_hw: Optional[np.ndarray], is_first: bool) -> np.ndarray:
299
- rgb_hwc = _frame_bgr_to_hwc_rgb_numpy(frame_bgr)
300
- H, W = rgb_hwc.shape[:2]
301
- seed_for_this_frame = _prepare_seed_mask(sam2_mask_hw, H, W) if (is_first and sam2_mask_hw is not None) else None
302
-
303
- # Primary: step (4D, post-patch); fallback to process_frame
304
- if self._api == "process_frame":
305
- try:
306
- out = self._call_process_frame(rgb_hwc, seed_for_this_frame, is_first)
307
- except Exception as e_proc:
308
- log.warning(f"[MATANY] process_frame failed ({e_proc}); falling back to step().")
309
- if not self._has_step:
310
- raise MatAnyError(f"process_frame failed and step() is unavailable: {e_proc}")
311
- self._api = "step"
312
- out = self._call_step(rgb_hwc, seed_for_this_frame, is_first)
313
- else:
314
- out = self._call_step(rgb_hwc, seed_for_this_frame, is_first)
315
-
316
- # Normalize to 2D alpha [H,W] in [0,1]
317
- if isinstance(out, torch.Tensor):
318
- alpha = out.detach().float().squeeze().cpu().numpy()
319
- else:
320
- alpha = np.asarray(out)
321
- alpha = alpha.astype(np.float32)
322
- if float(alpha.max()) > 1.0:
323
- alpha /= 255.0
324
- alpha = np.squeeze(alpha)
325
- if alpha.ndim != 2:
326
- raise MatAnyError(f"Expected 2D alpha matte; got shape {alpha.shape}")
327
- return np.clip(alpha, 0.0, 1.0)
328
-
329
- # ----- Public: streaming processor -----
330
  def process_stream(
331
  self,
332
  video_path: Path,
@@ -334,97 +126,73 @@ def process_stream(
334
  out_dir: Optional[Path] = None,
335
  progress_cb: Optional[Callable] = None,
336
  ) -> Tuple[Path, Path]:
 
 
 
 
 
 
 
 
 
 
 
 
337
  video_path = Path(video_path)
338
  if not video_path.exists():
339
  raise MatAnyError(f"Video file not found: {video_path}")
340
-
341
- out_dir = Path(out_dir) if out_dir else video_path.parent
 
 
 
342
  out_dir.mkdir(parents=True, exist_ok=True)
343
-
344
- cap_probe = cv2.VideoCapture(str(video_path))
345
- if not cap_probe.isOpened():
346
- raise MatAnyError(f"Failed to open video: {video_path}")
347
- N = int(cap_probe.get(cv2.CAP_PROP_FRAME_COUNT))
348
- fps = cap_probe.get(cv2.CAP_PROP_FPS)
349
- W = int(cap_probe.get(cv2.CAP_PROP_FRAME_WIDTH))
350
- H = int(cap_probe.get(cv2.CAP_PROP_FRAME_HEIGHT))
351
- cap_probe.release()
352
- if not fps or fps <= 0 or np.isnan(fps):
353
- fps = 25.0
354
-
355
- log.info(f"MatAnyone: {video_path.name} | {N} frames {W}x{H} @ {fps:.2f} fps")
356
- _emit_progress(progress_cb, 0.05, f"Video: {N} frames {W}x{H} @ {fps:.2f} fps")
357
- _emit_progress(progress_cb, 0.08, "Using per-frame processing")
358
-
359
- alpha_path = out_dir / "alpha.mp4"
360
- fg_path = out_dir / "fg.mp4"
361
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
362
- alpha_writer = cv2.VideoWriter(str(alpha_path), fourcc, fps, (W, H), True)
363
- fg_writer = cv2.VideoWriter(str(fg_path), fourcc, fps, (W, H), True)
364
- if not alpha_writer.isOpened() or not fg_writer.isOpened():
365
- raise MatAnyError("Failed to initialize VideoWriter(s)")
366
-
367
- seed_mask_np = None
368
- if seed_mask_path is not None:
369
- p = Path(seed_mask_path)
370
- if not p.exists():
371
- raise MatAnyError(f"Seed mask not found: {p}")
372
- m = cv2.imread(str(p), cv2.IMREAD_GRAYSCALE)
373
- if m is None:
374
- raise MatAnyError(f"Failed to read seed mask: {p}")
375
- seed_mask_np = m
376
-
377
- cap = cv2.VideoCapture(str(video_path))
378
- if not cap.isOpened():
379
- raise MatAnyError(f"Failed to open video for reading: {video_path}")
380
-
381
- idx = 0
382
- start = time.time()
383
- last_prog = start
384
  try:
385
- while True:
386
- ret, frame = cap.read()
387
- if not ret:
388
- break
389
- is_first = (idx == 0)
390
- alpha = self._run_frame(frame, seed_mask_np if is_first else None, is_first)
391
-
392
- alpha_u8 = (alpha * 255.0 + 0.5).astype(np.uint8)
393
- alpha_bgr = cv2.cvtColor(alpha_u8, cv2.COLOR_GRAY2BGR)
394
- fg_bgr = (frame.astype(np.float32) * alpha[..., None]).clip(0, 255).astype(np.uint8)
395
-
396
- alpha_writer.write(alpha_bgr)
397
- fg_writer.write(fg_bgr)
398
-
399
- idx += 1
400
- now = time.time()
401
- if now - last_prog >= 1.0:
402
- frac = idx / max(N, 1)
403
- _emit_progress(progress_cb, min(0.1 + 0.85 * frac, 0.95), f"MatAnyone: {idx}/{N} frames")
404
- last_prog = now
405
- if os.getenv("MATANY_LOG_VRAM", "0") == "1":
406
- log.debug(f"[MATANY] Frame {idx}/{N} VRAM: {_cuda_snapshot(self.device)}")
 
 
 
 
 
 
 
 
 
407
  except Exception as e:
408
- raise MatAnyError(f"Video processing failed: {e}") from e
 
 
409
  finally:
410
- try: cap.release()
411
- except: pass
412
- try: alpha_writer.release()
413
- except: pass
414
- try: fg_writer.release()
415
- except: pass
416
  _safe_empty_cache()
417
 
418
- if not alpha_path.exists() or alpha_path.stat().st_size == 0:
419
- raise MatAnyError(f"Output file missing/empty: {alpha_path}")
420
- if not fg_path.exists() or fg_path.stat().st_size == 0:
421
- raise MatAnyError(f"Output file missing/empty: {fg_path}")
422
-
423
- _emit_progress(progress_cb, 1.0, "MatAnyone: done")
424
- elapsed = time.time() - start
425
- log.info(f"MatAnyone completed: {idx} frames in {elapsed:.1f}s")
426
- return alpha_path, fg_path
427
-
428
  # ============================================================================
429
  # MatAnyoneModel Wrapper Class for app_hf.py compatibility
430
  # ============================================================================
@@ -463,8 +231,8 @@ def replace_background(self, video_path, masks, background_path):
463
  # Convert paths to Path objects
464
  video_path = Path(video_path)
465
 
466
- # Handle masks - for now, we'll use the session without a separate mask file
467
- # since MatAnyone expects SAM2 to provide the initial seed mask
468
 
469
  # Create output directory
470
  with tempfile.TemporaryDirectory() as temp_dir:
@@ -473,15 +241,50 @@ def replace_background(self, video_path, masks, background_path):
473
  # Process the video stream
474
  alpha_path, fg_path = self.session.process_stream(
475
  video_path=video_path,
476
- seed_mask_path=None, # We'll rely on SAM2 integration
477
  out_dir=output_dir,
478
  progress_cb=None
479
  )
480
 
481
- # For now, return the foreground video
482
  # In a full implementation, you'd composite with the background_path
483
  return str(fg_path)
484
 
485
  except Exception as e:
486
  log.error(f"Error in replace_background: {e}")
487
- raise MatAnyError(f"Background replacement failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  # -*- coding: utf-8 -*-
3
  """
4
+ MatAnyone adapter — Using Official API (File-Based)
5
 
6
+ Fixed to use MatAnyone's official process_video() API instead of
7
+ bypassing it with internal tensor manipulation. This eliminates
8
+ all 5D tensor dimension issues.
 
9
 
10
+ Changes (2025-09-17):
11
+ - Replaced custom tensor processing with official MatAnyone API
12
+ - Uses file-based input/output as designed by MatAnyone authors
13
+ - Eliminates all tensor dimension compatibility issues
14
+ - Simplified error handling and logging
 
 
 
15
  """
16
 
17
  from __future__ import annotations
18
  import os
 
19
  import time
20
  import logging
21
+ import tempfile
 
22
  import importlib.metadata
23
  from pathlib import Path
24
  from typing import Optional, Callable, Tuple
 
58
  pass
59
 
60
  # ---------- CUDA helpers ----------
61
+ def _cuda_snapshot(device: Optional[str]) -> str:
62
  try:
63
+ import torch
64
  if not torch.cuda.is_available():
65
  return "CUDA: N/A"
66
  idx = 0
67
+ if device and device.startswith("cuda:"):
68
+ try:
69
+ idx = int(device.split(":")[1])
70
+ except (ValueError, IndexError):
71
+ idx = 0
72
  name = torch.cuda.get_device_name(idx)
73
  alloc = torch.cuda.memory_allocated(idx) / (1024**3)
74
  resv = torch.cuda.memory_reserved(idx) / (1024**3)
 
77
  return f"CUDA snapshot error: {e!r}"
78
 
79
  def _safe_empty_cache():
 
 
80
  try:
81
+ import torch
82
+ if torch.cuda.is_available():
83
+ log.info(f"[MATANY] CUDA memory before empty_cache: {_cuda_snapshot('cuda:0')}")
84
+ torch.cuda.empty_cache()
85
+ log.info(f"[MATANY] CUDA memory after empty_cache: {_cuda_snapshot('cuda:0')}")
86
  except Exception:
87
  pass
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # ============================================================================
90
 
91
  class MatAnyoneSession:
92
  """
93
+ Simple wrapper around MatAnyone's official API.
94
+ Uses file-based input/output as designed by the MatAnyone authors.
 
95
  """
96
  def __init__(self, device: Optional[str] = None, precision: str = "auto"):
97
+ self.device = device or ("cuda" if self._cuda_available() else "cpu")
 
 
98
  self.precision = precision.lower()
99
+
 
 
 
 
 
 
100
  # Log MatAnyone version
101
  try:
102
  version = importlib.metadata.version("matanyone")
103
  log.info(f"[MATANY] MatAnyone version: {version}")
104
  except Exception:
105
  log.info("[MATANY] MatAnyone version unknown")
106
+
107
+ # Initialize MatAnyone's official API
 
 
 
 
 
 
 
 
 
 
 
108
  try:
109
+ from matanyone import InferenceCore
110
+ self.processor = InferenceCore("PeiqingYang/MatAnyone")
111
+ log.info("[MATANY] MatAnyone InferenceCore initialized successfully")
112
+ except Exception as e:
113
+ raise MatAnyError(f"Failed to initialize MatAnyone: {e}")
114
+
115
+ def _cuda_available(self) -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  try:
117
+ import torch
118
+ return torch.cuda.is_available()
119
+ except Exception:
120
+ return False
121
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def process_stream(
123
  self,
124
  video_path: Path,
 
126
  out_dir: Optional[Path] = None,
127
  progress_cb: Optional[Callable] = None,
128
  ) -> Tuple[Path, Path]:
129
+ """
130
+ Process video using MatAnyone's official API.
131
+
132
+ Args:
133
+ video_path: Path to input video file
134
+ seed_mask_path: Path to first-frame mask PNG (white=foreground, black=background)
135
+ out_dir: Output directory for results
136
+ progress_cb: Progress callback function
137
+
138
+ Returns:
139
+ Tuple of (alpha_path, foreground_path)
140
+ """
141
  video_path = Path(video_path)
142
  if not video_path.exists():
143
  raise MatAnyError(f"Video file not found: {video_path}")
144
+
145
+ if seed_mask_path and not Path(seed_mask_path).exists():
146
+ raise MatAnyError(f"Seed mask not found: {seed_mask_path}")
147
+
148
+ out_dir = Path(out_dir) if out_dir else video_path.parent / "matanyone_output"
149
  out_dir.mkdir(parents=True, exist_ok=True)
150
+
151
+ log.info(f"[MATANY] Processing video: {video_path}")
152
+ log.info(f"[MATANY] Using mask: {seed_mask_path}")
153
+ log.info(f"[MATANY] Output directory: {out_dir}")
154
+
155
+ _emit_progress(progress_cb, 0.0, "Initializing MatAnyone processing...")
156
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  try:
158
+ # Use MatAnyone's official API
159
+ start_time = time.time()
160
+
161
+ _emit_progress(progress_cb, 0.1, "Running MatAnyone video matting...")
162
+
163
+ # Call the official process_video method
164
+ foreground_path, alpha_path = self.processor.process_video(
165
+ input_path=str(video_path),
166
+ mask_path=str(seed_mask_path) if seed_mask_path else None,
167
+ output_path=str(out_dir)
168
+ )
169
+
170
+ processing_time = time.time() - start_time
171
+ log.info(f"[MATANY] Processing completed in {processing_time:.1f}s")
172
+ log.info(f"[MATANY] Foreground output: {foreground_path}")
173
+ log.info(f"[MATANY] Alpha output: {alpha_path}")
174
+
175
+ # Convert to Path objects
176
+ fg_path = Path(foreground_path) if foreground_path else None
177
+ al_path = Path(alpha_path) if alpha_path else None
178
+
179
+ # Verify outputs exist
180
+ if not fg_path or not fg_path.exists():
181
+ raise MatAnyError(f"Foreground output not created: {fg_path}")
182
+ if not al_path or not al_path.exists():
183
+ raise MatAnyError(f"Alpha output not created: {al_path}")
184
+
185
+ _emit_progress(progress_cb, 1.0, "MatAnyone processing complete")
186
+
187
+ return al_path, fg_path # Return (alpha, foreground) to match expected order
188
+
189
  except Exception as e:
190
+ log.error(f"[MATANY] Processing failed: {e}")
191
+ raise MatAnyError(f"MatAnyone processing failed: {e}")
192
+
193
  finally:
 
 
 
 
 
 
194
  _safe_empty_cache()
195
 
 
 
 
 
 
 
 
 
 
 
196
  # ============================================================================
197
  # MatAnyoneModel Wrapper Class for app_hf.py compatibility
198
  # ============================================================================
 
231
  # Convert paths to Path objects
232
  video_path = Path(video_path)
233
 
234
+ # For now, we expect masks to be a path to the first-frame mask
235
+ mask_path = Path(masks) if isinstance(masks, (str, Path)) else None
236
 
237
  # Create output directory
238
  with tempfile.TemporaryDirectory() as temp_dir:
 
241
  # Process the video stream
242
  alpha_path, fg_path = self.session.process_stream(
243
  video_path=video_path,
244
+ seed_mask_path=mask_path,
245
  out_dir=output_dir,
246
  progress_cb=None
247
  )
248
 
249
+ # Return the foreground video path
250
  # In a full implementation, you'd composite with the background_path
251
  return str(fg_path)
252
 
253
  except Exception as e:
254
  log.error(f"Error in replace_background: {e}")
255
+ raise MatAnyError(f"Background replacement failed: {e}")
256
+
257
+ # ============================================================================
258
+ # Helper function for pipeline integration
259
+ # ============================================================================
260
+
261
+ def create_matanyone_session(device=None):
262
+ """Create a MatAnyone session for use in pipeline"""
263
+ return MatAnyoneSession(device=device)
264
+
265
+ def run_matanyone_on_files(video_path, mask_path, output_dir, device="cuda", progress_callback=None):
266
+ """
267
+ Run MatAnyone on video and mask files.
268
+
269
+ Args:
270
+ video_path: Path to input video
271
+ mask_path: Path to first-frame mask PNG
272
+ output_dir: Directory for outputs
273
+ device: Device to use (cuda/cpu)
274
+ progress_callback: Progress callback function
275
+
276
+ Returns:
277
+ Tuple of (alpha_path, foreground_path) or (None, None) on failure
278
+ """
279
+ try:
280
+ session = MatAnyoneSession(device=device)
281
+ alpha_path, fg_path = session.process_stream(
282
+ video_path=Path(video_path),
283
+ seed_mask_path=Path(mask_path) if mask_path else None,
284
+ out_dir=Path(output_dir),
285
+ progress_cb=progress_callback
286
+ )
287
+ return str(alpha_path), str(fg_path)
288
+ except Exception as e:
289
+ log.error(f"MatAnyone processing failed: {e}")
290
+ return None, None