MogensR commited on
Commit
6a7c643
Β·
1 Parent(s): 4ae8fb1
Files changed (1) hide show
  1. pipeline.py +187 -4
pipeline.py CHANGED
@@ -12,6 +12,7 @@
12
  - Fallbacks: MediaPipe SelfieSegmentation β†’ else OpenCV GrabCut
13
  - H.264 MP4 output (ffmpeg when available; OpenCV fallback)
14
  - Audio mux: original audio copied into final output (AAC) if present
 
15
 
16
  Environment knobs (all optional):
17
  - THIRD_PARTY_SAM2_DIR, THIRD_PARTY_MATANY_DIR
@@ -25,6 +26,8 @@
25
  - EDGE_ERODE=1, EDGE_DILATE=2, EDGE_BLUR=1.5
26
  - LIGHTWRAP_RADIUS=5, LIGHTWRAP_AMOUNT=0.18
27
  - DESPILL_AMOUNT=0.35
 
 
28
  """
29
 
30
  from __future__ import annotations
@@ -255,6 +258,152 @@ def _video_writer(out_path: Path, fps: int, size: Tuple[int, int]) -> cv2.VideoW
255
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
256
  return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  # --------------------------------------------------------------------------------------
259
  # SAM2 Integration
260
  # --------------------------------------------------------------------------------------
@@ -292,7 +441,6 @@ def run_sam2_mask(predictor: object,
292
  if predictor is None:
293
  return None, False
294
  try:
295
- # --- begin try block body (ensure syntax is correct) ---
296
  rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
297
  predictor.set_image(rgb)
298
 
@@ -604,7 +752,7 @@ def process(video_path: Union[str, Path],
604
  point_y: Optional[float] = None,
605
  auto_box: bool = False,
606
  work_dir: Optional[Union[str, Path]] = None) -> Tuple[Optional[str], Dict[str, Any]]:
607
- """Orchestrate: SAM2 mask β†’ (optional GrabCut refine) β†’ MatAnyone β†’ composite β†’ mux audio."""
608
  t0 = time.time()
609
  diagnostics: Dict[str, Any] = {
610
  "sam2_ok": False,
@@ -673,9 +821,44 @@ def process(video_path: Union[str, Path],
673
  else:
674
  ran = False
675
 
676
- output_path = tmp_root / "output.mp4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
677
 
678
- # 3) Composite
 
 
 
 
 
679
  if diagnostics["matany_ok"] and fg_path and al_path:
680
  ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
681
  if not ok_comp:
 
12
  - Fallbacks: MediaPipe SelfieSegmentation β†’ else OpenCV GrabCut
13
  - H.264 MP4 output (ffmpeg when available; OpenCV fallback)
14
  - Audio mux: original audio copied into final output (AAC) if present
15
+ - NEW: Stage-A transparent export (VP9 with alpha or checkerboard preview)
16
 
17
  Environment knobs (all optional):
18
  - THIRD_PARTY_SAM2_DIR, THIRD_PARTY_MATANY_DIR
 
26
  - EDGE_ERODE=1, EDGE_DILATE=2, EDGE_BLUR=1.5
27
  - LIGHTWRAP_RADIUS=5, LIGHTWRAP_AMOUNT=0.18
28
  - DESPILL_AMOUNT=0.35
29
+ - RETURN_STAGE_A=0 | 1 (if 1, return Stage-A file instead of final composite)
30
+ - STAGEA_VP9_CRF=28 (quality for VP9 alpha export)
31
  """
32
 
33
  from __future__ import annotations
 
258
  fourcc = cv2.VideoWriter_fourcc(*"mp4v")
259
  return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
260
 
261
+ # --- Stage-A (transparent) builders ----------------------------------------------------
262
+ def _build_stage_a_rgba_vp9_from_fg_alpha(
263
+ fg_path: Union[str, Path],
264
+ alpha_path: Union[str, Path],
265
+ out_webm: Union[str, Path],
266
+ fps: int,
267
+ size: Tuple[int, int],
268
+ src_audio: Optional[Union[str, Path]] = None,
269
+ ) -> bool:
270
+ """Merge FG+ALPHA β†’ RGBA WebM (VP9 with alpha). Optionally mux original audio (Opus)."""
271
+ if not _probe_ffmpeg():
272
+ return False
273
+ w, h = size
274
+ try:
275
+ cmd = [
276
+ _ffmpeg_bin(), "-y",
277
+ "-i", str(fg_path), # 0: FG video
278
+ "-i", str(alpha_path), # 1: ALPHA video (grayscale)
279
+ ]
280
+ if src_audio:
281
+ cmd += ["-i", str(src_audio)] # 2: original (for audio)
282
+ fcx = f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];" \
283
+ f"[0:v]scale={w}:{h},fps={fps}[fg];" \
284
+ f"[fg][al]alphamerge[outv]"
285
+ cmd += ["-filter_complex", fcx, "-map", "[outv]"]
286
+ if src_audio:
287
+ cmd += ["-map", "2:a:0?", "-c:a", "libopus", "-b:a", "128k"]
288
+ cmd += [
289
+ "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
290
+ "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
291
+ "-b:v", "0", "-row-mt", "1",
292
+ "-shortest",
293
+ str(out_webm),
294
+ ]
295
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
296
+ return True
297
+ except Exception as e:
298
+ logger.warning(f"Stage-A VP9(alpha) build failed: {e}")
299
+ return False
300
+
301
+ def _build_stage_a_rgba_vp9_from_mask(
302
+ video_path: Union[str, Path],
303
+ mask_png: Union[str, Path],
304
+ out_webm: Union[str, Path],
305
+ fps: int,
306
+ size: Tuple[int, int],
307
+ ) -> bool:
308
+ """Merge original video + static mask β†’ RGBA WebM (VP9 with alpha)."""
309
+ if not _probe_ffmpeg():
310
+ return False
311
+ w, h = size
312
+ try:
313
+ cmd = [
314
+ _ffmpeg_bin(), "-y",
315
+ "-i", str(video_path), # 0: original video
316
+ "-loop", "1", "-i", str(mask_png), # 1: static PNG mask (grayscale)
317
+ "-filter_complex",
318
+ f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];"
319
+ f"[0:v]scale={w}:{h},fps={fps}[fg];"
320
+ f"[fg][al]alphamerge[outv]",
321
+ "-map", "[outv]",
322
+ "-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
323
+ "-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
324
+ "-b:v", "0", "-row-mt", "1",
325
+ "-shortest",
326
+ str(out_webm),
327
+ ]
328
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
329
+ return True
330
+ except Exception as e:
331
+ logger.warning(f"Stage-A VP9(alpha) (mask) build failed: {e}")
332
+ return False
333
+
334
+ def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
335
+ """RGB checkerboard (for preview when no real alpha is possible)."""
336
+ y, x = np.mgrid[0:h, 0:w]
337
+ c = ((x // tile) + (y // tile)) % 2
338
+ a = np.where(c == 0, 200, 150).astype(np.uint8)
339
+ return np.stack([a, a, a], axis=-1)
340
+
341
+ def _build_stage_a_checkerboard_from_fg_alpha(
342
+ fg_path: Union[str, Path],
343
+ alpha_path: Union[str, Path],
344
+ out_mp4: Union[str, Path],
345
+ fps: int,
346
+ size: Tuple[int, int],
347
+ ) -> bool:
348
+ """Preview: FG+ALPHA over checkerboard β†’ MP4 (no real alpha)."""
349
+ fg_cap = cv2.VideoCapture(str(fg_path))
350
+ al_cap = cv2.VideoCapture(str(alpha_path))
351
+ if not fg_cap.isOpened() or not al_cap.isOpened():
352
+ return False
353
+ w, h = size
354
+ writer = _video_writer(Path(out_mp4), fps, (w, h))
355
+ bg = _checkerboard_bg(w, h)
356
+ ok_any = False
357
+ try:
358
+ while True:
359
+ okf, fg = fg_cap.read()
360
+ oka, al = al_cap.read()
361
+ if not okf or not oka:
362
+ break
363
+ fg = cv2.resize(fg, (w, h))
364
+ al = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
365
+ comp = _composite_frame_pro(cv2.cvtColor(fg, cv2.COLOR_BGR2RGB), al, bg)
366
+ writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
367
+ ok_any = True
368
+ finally:
369
+ fg_cap.release()
370
+ al_cap.release()
371
+ writer.release()
372
+ return ok_any
373
+
374
+ def _build_stage_a_checkerboard_from_mask(
375
+ video_path: Union[str, Path],
376
+ mask_png: Union[str, Path],
377
+ out_mp4: Union[str, Path],
378
+ fps: int,
379
+ size: Tuple[int, int],
380
+ ) -> bool:
381
+ """Preview: original video + static mask over checkerboard β†’ MP4."""
382
+ cap = cv2.VideoCapture(str(video_path))
383
+ if not cap.isOpened():
384
+ return False
385
+ w, h = size
386
+ mask = cv2.imread(str(mask_png), cv2.IMREAD_GRAYSCALE)
387
+ if mask is None:
388
+ return False
389
+ mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
390
+ writer = _video_writer(Path(out_mp4), fps, (w, h))
391
+ bg = _checkerboard_bg(w, h)
392
+ ok_any = False
393
+ try:
394
+ while True:
395
+ ok, frame = cap.read()
396
+ if not ok:
397
+ break
398
+ frame = cv2.resize(frame, (w, h))
399
+ comp = _composite_frame_pro(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), mask, bg)
400
+ writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
401
+ ok_any = True
402
+ finally:
403
+ cap.release()
404
+ writer.release()
405
+ return ok_any
406
+
407
  # --------------------------------------------------------------------------------------
408
  # SAM2 Integration
409
  # --------------------------------------------------------------------------------------
 
441
  if predictor is None:
442
  return None, False
443
  try:
 
444
  rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
445
  predictor.set_image(rgb)
446
 
 
752
  point_y: Optional[float] = None,
753
  auto_box: bool = False,
754
  work_dir: Optional[Union[str, Path]] = None) -> Tuple[Optional[str], Dict[str, Any]]:
755
+ """Orchestrate: SAM2 mask β†’ (optional GrabCut refine) β†’ MatAnyone β†’ Stage-A β†’ composite β†’ mux audio."""
756
  t0 = time.time()
757
  diagnostics: Dict[str, Any] = {
758
  "sam2_ok": False,
 
821
  else:
822
  ran = False
823
 
824
+ # --- Build Stage-A (transparent) file for inspection ---
825
+ stageA_path = None
826
+ stageA_ok = False
827
+ if diagnostics["matany_ok"] and fg_path and al_path:
828
+ stageA_path = tmp_root / "stageA_transparent.webm"
829
+ if _probe_ffmpeg():
830
+ stageA_ok = _build_stage_a_rgba_vp9_from_fg_alpha(
831
+ fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh), src_audio=video_path
832
+ )
833
+ if not stageA_ok:
834
+ stageA_path = tmp_root / "stageA_checkerboard.mp4"
835
+ stageA_ok = _build_stage_a_checkerboard_from_fg_alpha(
836
+ fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh)
837
+ )
838
+ else:
839
+ stageA_path = tmp_root / "stageA_transparent.webm"
840
+ if _probe_ffmpeg():
841
+ stageA_ok = _build_stage_a_rgba_vp9_from_mask(
842
+ video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
843
+ )
844
+ if not stageA_ok:
845
+ stageA_path = tmp_root / "stageA_checkerboard.mp4"
846
+ stageA_ok = _build_stage_a_checkerboard_from_mask(
847
+ video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
848
+ )
849
+
850
+ diagnostics["stageA_path"] = str(stageA_path) if stageA_ok else None
851
+ diagnostics["stageA_note"] = (
852
+ "WebM with real alpha (VP9)" if stageA_ok and str(stageA_path).endswith(".webm")
853
+ else ("MP4 checkerboard preview (no real alpha)" if stageA_ok else "Stage-A build failed")
854
+ )
855
 
856
+ # Optional: return Stage-A instead of final composite
857
+ if os.environ.get("RETURN_STAGE_A", "0").strip() == "1" and stageA_ok:
858
+ return str(stageA_path), diagnostics
859
+
860
+ # 3) Composite to final background
861
+ output_path = tmp_root / "output.mp4"
862
  if diagnostics["matany_ok"] and fg_path and al_path:
863
  ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
864
  if not ok_comp: