fix 10
Browse files- pipeline.py +187 -4
pipeline.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
| 12 |
- Fallbacks: MediaPipe SelfieSegmentation β else OpenCV GrabCut
|
| 13 |
- H.264 MP4 output (ffmpeg when available; OpenCV fallback)
|
| 14 |
- Audio mux: original audio copied into final output (AAC) if present
|
|
|
|
| 15 |
|
| 16 |
Environment knobs (all optional):
|
| 17 |
- THIRD_PARTY_SAM2_DIR, THIRD_PARTY_MATANY_DIR
|
|
@@ -25,6 +26,8 @@
|
|
| 25 |
- EDGE_ERODE=1, EDGE_DILATE=2, EDGE_BLUR=1.5
|
| 26 |
- LIGHTWRAP_RADIUS=5, LIGHTWRAP_AMOUNT=0.18
|
| 27 |
- DESPILL_AMOUNT=0.35
|
|
|
|
|
|
|
| 28 |
"""
|
| 29 |
|
| 30 |
from __future__ import annotations
|
|
@@ -255,6 +258,152 @@ def _video_writer(out_path: Path, fps: int, size: Tuple[int, int]) -> cv2.VideoW
|
|
| 255 |
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 256 |
return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
|
| 257 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
# --------------------------------------------------------------------------------------
|
| 259 |
# SAM2 Integration
|
| 260 |
# --------------------------------------------------------------------------------------
|
|
@@ -292,7 +441,6 @@ def run_sam2_mask(predictor: object,
|
|
| 292 |
if predictor is None:
|
| 293 |
return None, False
|
| 294 |
try:
|
| 295 |
-
# --- begin try block body (ensure syntax is correct) ---
|
| 296 |
rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
|
| 297 |
predictor.set_image(rgb)
|
| 298 |
|
|
@@ -604,7 +752,7 @@ def process(video_path: Union[str, Path],
|
|
| 604 |
point_y: Optional[float] = None,
|
| 605 |
auto_box: bool = False,
|
| 606 |
work_dir: Optional[Union[str, Path]] = None) -> Tuple[Optional[str], Dict[str, Any]]:
|
| 607 |
-
"""Orchestrate: SAM2 mask β (optional GrabCut refine) β MatAnyone β composite β mux audio."""
|
| 608 |
t0 = time.time()
|
| 609 |
diagnostics: Dict[str, Any] = {
|
| 610 |
"sam2_ok": False,
|
|
@@ -673,9 +821,44 @@ def process(video_path: Union[str, Path],
|
|
| 673 |
else:
|
| 674 |
ran = False
|
| 675 |
|
| 676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
|
| 678 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
if diagnostics["matany_ok"] and fg_path and al_path:
|
| 680 |
ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
|
| 681 |
if not ok_comp:
|
|
|
|
| 12 |
- Fallbacks: MediaPipe SelfieSegmentation β else OpenCV GrabCut
|
| 13 |
- H.264 MP4 output (ffmpeg when available; OpenCV fallback)
|
| 14 |
- Audio mux: original audio copied into final output (AAC) if present
|
| 15 |
+
- NEW: Stage-A transparent export (VP9 with alpha or checkerboard preview)
|
| 16 |
|
| 17 |
Environment knobs (all optional):
|
| 18 |
- THIRD_PARTY_SAM2_DIR, THIRD_PARTY_MATANY_DIR
|
|
|
|
| 26 |
- EDGE_ERODE=1, EDGE_DILATE=2, EDGE_BLUR=1.5
|
| 27 |
- LIGHTWRAP_RADIUS=5, LIGHTWRAP_AMOUNT=0.18
|
| 28 |
- DESPILL_AMOUNT=0.35
|
| 29 |
+
- RETURN_STAGE_A=0 | 1 (if 1, return Stage-A file instead of final composite)
|
| 30 |
+
- STAGEA_VP9_CRF=28 (quality for VP9 alpha export)
|
| 31 |
"""
|
| 32 |
|
| 33 |
from __future__ import annotations
|
|
|
|
| 258 |
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 259 |
return cv2.VideoWriter(str(out_path), fourcc, max(1, fps), size)
|
| 260 |
|
| 261 |
+
# --- Stage-A (transparent) builders ----------------------------------------------------
|
| 262 |
+
def _build_stage_a_rgba_vp9_from_fg_alpha(
|
| 263 |
+
fg_path: Union[str, Path],
|
| 264 |
+
alpha_path: Union[str, Path],
|
| 265 |
+
out_webm: Union[str, Path],
|
| 266 |
+
fps: int,
|
| 267 |
+
size: Tuple[int, int],
|
| 268 |
+
src_audio: Optional[Union[str, Path]] = None,
|
| 269 |
+
) -> bool:
|
| 270 |
+
"""Merge FG+ALPHA β RGBA WebM (VP9 with alpha). Optionally mux original audio (Opus)."""
|
| 271 |
+
if not _probe_ffmpeg():
|
| 272 |
+
return False
|
| 273 |
+
w, h = size
|
| 274 |
+
try:
|
| 275 |
+
cmd = [
|
| 276 |
+
_ffmpeg_bin(), "-y",
|
| 277 |
+
"-i", str(fg_path), # 0: FG video
|
| 278 |
+
"-i", str(alpha_path), # 1: ALPHA video (grayscale)
|
| 279 |
+
]
|
| 280 |
+
if src_audio:
|
| 281 |
+
cmd += ["-i", str(src_audio)] # 2: original (for audio)
|
| 282 |
+
fcx = f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];" \
|
| 283 |
+
f"[0:v]scale={w}:{h},fps={fps}[fg];" \
|
| 284 |
+
f"[fg][al]alphamerge[outv]"
|
| 285 |
+
cmd += ["-filter_complex", fcx, "-map", "[outv]"]
|
| 286 |
+
if src_audio:
|
| 287 |
+
cmd += ["-map", "2:a:0?", "-c:a", "libopus", "-b:a", "128k"]
|
| 288 |
+
cmd += [
|
| 289 |
+
"-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
|
| 290 |
+
"-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
|
| 291 |
+
"-b:v", "0", "-row-mt", "1",
|
| 292 |
+
"-shortest",
|
| 293 |
+
str(out_webm),
|
| 294 |
+
]
|
| 295 |
+
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 296 |
+
return True
|
| 297 |
+
except Exception as e:
|
| 298 |
+
logger.warning(f"Stage-A VP9(alpha) build failed: {e}")
|
| 299 |
+
return False
|
| 300 |
+
|
| 301 |
+
def _build_stage_a_rgba_vp9_from_mask(
|
| 302 |
+
video_path: Union[str, Path],
|
| 303 |
+
mask_png: Union[str, Path],
|
| 304 |
+
out_webm: Union[str, Path],
|
| 305 |
+
fps: int,
|
| 306 |
+
size: Tuple[int, int],
|
| 307 |
+
) -> bool:
|
| 308 |
+
"""Merge original video + static mask β RGBA WebM (VP9 with alpha)."""
|
| 309 |
+
if not _probe_ffmpeg():
|
| 310 |
+
return False
|
| 311 |
+
w, h = size
|
| 312 |
+
try:
|
| 313 |
+
cmd = [
|
| 314 |
+
_ffmpeg_bin(), "-y",
|
| 315 |
+
"-i", str(video_path), # 0: original video
|
| 316 |
+
"-loop", "1", "-i", str(mask_png), # 1: static PNG mask (grayscale)
|
| 317 |
+
"-filter_complex",
|
| 318 |
+
f"[1:v]format=gray,scale={w}:{h},fps={fps}[al];"
|
| 319 |
+
f"[0:v]scale={w}:{h},fps={fps}[fg];"
|
| 320 |
+
f"[fg][al]alphamerge[outv]",
|
| 321 |
+
"-map", "[outv]",
|
| 322 |
+
"-c:v", "libvpx-vp9", "-pix_fmt", "yuva420p",
|
| 323 |
+
"-crf", os.environ.get("STAGEA_VP9_CRF", "28"),
|
| 324 |
+
"-b:v", "0", "-row-mt", "1",
|
| 325 |
+
"-shortest",
|
| 326 |
+
str(out_webm),
|
| 327 |
+
]
|
| 328 |
+
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 329 |
+
return True
|
| 330 |
+
except Exception as e:
|
| 331 |
+
logger.warning(f"Stage-A VP9(alpha) (mask) build failed: {e}")
|
| 332 |
+
return False
|
| 333 |
+
|
| 334 |
+
def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
|
| 335 |
+
"""RGB checkerboard (for preview when no real alpha is possible)."""
|
| 336 |
+
y, x = np.mgrid[0:h, 0:w]
|
| 337 |
+
c = ((x // tile) + (y // tile)) % 2
|
| 338 |
+
a = np.where(c == 0, 200, 150).astype(np.uint8)
|
| 339 |
+
return np.stack([a, a, a], axis=-1)
|
| 340 |
+
|
| 341 |
+
def _build_stage_a_checkerboard_from_fg_alpha(
|
| 342 |
+
fg_path: Union[str, Path],
|
| 343 |
+
alpha_path: Union[str, Path],
|
| 344 |
+
out_mp4: Union[str, Path],
|
| 345 |
+
fps: int,
|
| 346 |
+
size: Tuple[int, int],
|
| 347 |
+
) -> bool:
|
| 348 |
+
"""Preview: FG+ALPHA over checkerboard β MP4 (no real alpha)."""
|
| 349 |
+
fg_cap = cv2.VideoCapture(str(fg_path))
|
| 350 |
+
al_cap = cv2.VideoCapture(str(alpha_path))
|
| 351 |
+
if not fg_cap.isOpened() or not al_cap.isOpened():
|
| 352 |
+
return False
|
| 353 |
+
w, h = size
|
| 354 |
+
writer = _video_writer(Path(out_mp4), fps, (w, h))
|
| 355 |
+
bg = _checkerboard_bg(w, h)
|
| 356 |
+
ok_any = False
|
| 357 |
+
try:
|
| 358 |
+
while True:
|
| 359 |
+
okf, fg = fg_cap.read()
|
| 360 |
+
oka, al = al_cap.read()
|
| 361 |
+
if not okf or not oka:
|
| 362 |
+
break
|
| 363 |
+
fg = cv2.resize(fg, (w, h))
|
| 364 |
+
al = cv2.cvtColor(cv2.resize(al, (w, h)), cv2.COLOR_BGR2GRAY)
|
| 365 |
+
comp = _composite_frame_pro(cv2.cvtColor(fg, cv2.COLOR_BGR2RGB), al, bg)
|
| 366 |
+
writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
|
| 367 |
+
ok_any = True
|
| 368 |
+
finally:
|
| 369 |
+
fg_cap.release()
|
| 370 |
+
al_cap.release()
|
| 371 |
+
writer.release()
|
| 372 |
+
return ok_any
|
| 373 |
+
|
| 374 |
+
def _build_stage_a_checkerboard_from_mask(
|
| 375 |
+
video_path: Union[str, Path],
|
| 376 |
+
mask_png: Union[str, Path],
|
| 377 |
+
out_mp4: Union[str, Path],
|
| 378 |
+
fps: int,
|
| 379 |
+
size: Tuple[int, int],
|
| 380 |
+
) -> bool:
|
| 381 |
+
"""Preview: original video + static mask over checkerboard β MP4."""
|
| 382 |
+
cap = cv2.VideoCapture(str(video_path))
|
| 383 |
+
if not cap.isOpened():
|
| 384 |
+
return False
|
| 385 |
+
w, h = size
|
| 386 |
+
mask = cv2.imread(str(mask_png), cv2.IMREAD_GRAYSCALE)
|
| 387 |
+
if mask is None:
|
| 388 |
+
return False
|
| 389 |
+
mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
|
| 390 |
+
writer = _video_writer(Path(out_mp4), fps, (w, h))
|
| 391 |
+
bg = _checkerboard_bg(w, h)
|
| 392 |
+
ok_any = False
|
| 393 |
+
try:
|
| 394 |
+
while True:
|
| 395 |
+
ok, frame = cap.read()
|
| 396 |
+
if not ok:
|
| 397 |
+
break
|
| 398 |
+
frame = cv2.resize(frame, (w, h))
|
| 399 |
+
comp = _composite_frame_pro(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), mask, bg)
|
| 400 |
+
writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
|
| 401 |
+
ok_any = True
|
| 402 |
+
finally:
|
| 403 |
+
cap.release()
|
| 404 |
+
writer.release()
|
| 405 |
+
return ok_any
|
| 406 |
+
|
| 407 |
# --------------------------------------------------------------------------------------
|
| 408 |
# SAM2 Integration
|
| 409 |
# --------------------------------------------------------------------------------------
|
|
|
|
| 441 |
if predictor is None:
|
| 442 |
return None, False
|
| 443 |
try:
|
|
|
|
| 444 |
rgb = cv2.cvtColor(first_frame_bgr, cv2.COLOR_BGR2RGB)
|
| 445 |
predictor.set_image(rgb)
|
| 446 |
|
|
|
|
| 752 |
point_y: Optional[float] = None,
|
| 753 |
auto_box: bool = False,
|
| 754 |
work_dir: Optional[Union[str, Path]] = None) -> Tuple[Optional[str], Dict[str, Any]]:
|
| 755 |
+
"""Orchestrate: SAM2 mask β (optional GrabCut refine) β MatAnyone β Stage-A β composite β mux audio."""
|
| 756 |
t0 = time.time()
|
| 757 |
diagnostics: Dict[str, Any] = {
|
| 758 |
"sam2_ok": False,
|
|
|
|
| 821 |
else:
|
| 822 |
ran = False
|
| 823 |
|
| 824 |
+
# --- Build Stage-A (transparent) file for inspection ---
|
| 825 |
+
stageA_path = None
|
| 826 |
+
stageA_ok = False
|
| 827 |
+
if diagnostics["matany_ok"] and fg_path and al_path:
|
| 828 |
+
stageA_path = tmp_root / "stageA_transparent.webm"
|
| 829 |
+
if _probe_ffmpeg():
|
| 830 |
+
stageA_ok = _build_stage_a_rgba_vp9_from_fg_alpha(
|
| 831 |
+
fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh), src_audio=video_path
|
| 832 |
+
)
|
| 833 |
+
if not stageA_ok:
|
| 834 |
+
stageA_path = tmp_root / "stageA_checkerboard.mp4"
|
| 835 |
+
stageA_ok = _build_stage_a_checkerboard_from_fg_alpha(
|
| 836 |
+
fg_path, al_path, stageA_path, diagnostics["fps"], (vw, vh)
|
| 837 |
+
)
|
| 838 |
+
else:
|
| 839 |
+
stageA_path = tmp_root / "stageA_transparent.webm"
|
| 840 |
+
if _probe_ffmpeg():
|
| 841 |
+
stageA_ok = _build_stage_a_rgba_vp9_from_mask(
|
| 842 |
+
video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
|
| 843 |
+
)
|
| 844 |
+
if not stageA_ok:
|
| 845 |
+
stageA_path = tmp_root / "stageA_checkerboard.mp4"
|
| 846 |
+
stageA_ok = _build_stage_a_checkerboard_from_mask(
|
| 847 |
+
video_path, mask_png, stageA_path, diagnostics["fps"], (vw, vh)
|
| 848 |
+
)
|
| 849 |
+
|
| 850 |
+
diagnostics["stageA_path"] = str(stageA_path) if stageA_ok else None
|
| 851 |
+
diagnostics["stageA_note"] = (
|
| 852 |
+
"WebM with real alpha (VP9)" if stageA_ok and str(stageA_path).endswith(".webm")
|
| 853 |
+
else ("MP4 checkerboard preview (no real alpha)" if stageA_ok else "Stage-A build failed")
|
| 854 |
+
)
|
| 855 |
|
| 856 |
+
# Optional: return Stage-A instead of final composite
|
| 857 |
+
if os.environ.get("RETURN_STAGE_A", "0").strip() == "1" and stageA_ok:
|
| 858 |
+
return str(stageA_path), diagnostics
|
| 859 |
+
|
| 860 |
+
# 3) Composite to final background
|
| 861 |
+
output_path = tmp_root / "output.mp4"
|
| 862 |
if diagnostics["matany_ok"] and fg_path and al_path:
|
| 863 |
ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
|
| 864 |
if not ok_comp:
|