MogensR commited on
Commit
c01f936
·
1 Parent(s): bc45f7d

Update processing/video/video_processor.py

Browse files
Files changed (1) hide show
  1. processing/video/video_processor.py +30 -323
processing/video/video_processor.py CHANGED
@@ -1,327 +1,9 @@
1
- #!/usr/bin/env python3
2
- """
3
- Compatibility shim: CoreVideoProcessor (stabilized + crisper edges)
4
-
5
- - Accepts background configs:
6
- {"custom_path": "/path/to/image.png"}
7
- {"background_choice": "<preset_key>"}
8
- {"gradient": {type, start, end, angle_deg}}
9
- - Model-only downscale (max_model_size) for speed, full-res render.
10
- - FFmpeg pipe writer with encoder fallbacks and stderr surfacing; falls back
11
- to OpenCV VideoWriter if FFmpeg isn't available or fails mid-run.
12
- - Temporal smoothing + mask hardening to avoid flicker/ghosting.
13
- - Windowed two-phase execution (SAM2 window → release → MatAnyone window)
14
- to avoid GPU fragmentation/OOM on T4 (16GB).
15
-
16
- Requirements for the models provider:
17
- - get_sam2() -> predictor or None
18
- - get_matanyone() -> processor or None
19
- """
20
-
21
- from __future__ import annotations
22
-
23
- from dataclasses import dataclass
24
- from typing import Optional, Dict, Any, Callable, List, Tuple
25
- import os
26
- import time
27
- import threading
28
- import shutil
29
- import subprocess
30
- import shlex
31
-
32
- import cv2
33
- import numpy as np
34
-
35
- # Try project logger; fall back to std logging
36
- try:
37
- from utils.logging_setup import make_logger
38
- _log = make_logger(__name__)
39
- except Exception:
40
- import logging
41
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
42
- _log = logging.getLogger(__name__)
43
-
44
- # Import directly from utils.cv_processing to avoid circular imports via utils/__init__.py
45
- from utils.cv_processing import (
46
- segment_person_hq,
47
- refine_mask_hq,
48
- replace_background_hq,
49
- create_professional_background,
50
- validate_video_file,
51
- PROFESSIONAL_BACKGROUNDS,
52
- )
53
-
54
- # ---------- local gradient helper (no extra imports needed) ----------
55
- def _to_rgb(c):
56
- if isinstance(c, (list, tuple)) and len(c) == 3:
57
- return tuple(int(x) for x in c)
58
- if isinstance(c, str) and c.startswith("#") and len(c) == 7:
59
- return tuple(int(c[i:i+2], 16) for i in (1, 3, 5))
60
- return (255, 255, 255)
61
-
62
- def _create_gradient_background_local(spec: Dict[str, Any], width: int, height: int) -> np.ndarray:
63
- """
64
- Minimal gradient generator for backgrounds (linear with rotation).
65
- spec = {"type": "linear"|"radial"(ignored), "start": (r,g,b)|"#rrggbb", "end": ..., "angle_deg": float}
66
- Returns RGB np.uint8 (H,W,3)
67
- """
68
- start = _to_rgb(spec.get("start", "#222222"))
69
- end = _to_rgb(spec.get("end", "#888888"))
70
- angle = float(spec.get("angle_deg", 0))
71
-
72
- # build vertical gradient
73
- bg = np.zeros((height, width, 3), np.uint8)
74
- for y in range(height):
75
- t = y / max(1, height - 1)
76
- r = int(start[0]*(1-t) + end[0]*t)
77
- g = int(start[1]*(1-t) + end[1]*t)
78
- b = int(start[2]*(1-t) + end[2]*t)
79
- bg[y, :] = (r, g, b)
80
-
81
- if abs(angle) % 360 < 1e-6:
82
- return bg
83
-
84
- # rotate by angle using OpenCV (RGB-safe)
85
- center = (width / 2, height / 2)
86
- M = cv2.getRotationMatrix2D(center, angle, 1.0)
87
- rot = cv2.warpAffine(bg, M, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)
88
- return rot
89
-
90
-
91
- @dataclass
92
- class ProcessorConfig:
93
- background_preset: str = "office" # key in PROFESSIONAL_BACKGROUNDS
94
- write_fps: Optional[float] = None # None -> keep source fps
95
-
96
- # Model-only downscale (speedup without changing output resolution)
97
- max_model_size: Optional[int] = 1280
98
-
99
- # FFmpeg / NVENC output (pipe). If disabled or unavailable, use OpenCV writer.
100
- use_nvenc: bool = True
101
- nvenc_codec: str = "h264" # "h264" or "hevc"
102
- nvenc_preset: str = "p5" # NVENC preset string
103
- nvenc_cq: int = 18 # constant quality (lower = higher quality)
104
- nvenc_tune_hq: bool = True
105
- nvenc_pix_fmt: str = "yuv420p" # browser-safe
106
-
107
- # libx264 fallback
108
- x264_preset: str = "medium"
109
- x264_crf: int = 18
110
- x264_pix_fmt: str = "yuv420p"
111
-
112
- movflags_faststart: bool = True
113
-
114
- # ---------- stability & edge quality ----------
115
- temporal_ema_alpha: float = 0.75 # higher = calmer (0.6–0.85 typical)
116
- min_iou_to_accept: float = 0.05 # reject sudden mask jumps
117
- dilate_px: int = 6 # pad edges to keep hair/ears/shoulders
118
- edge_blur_px: int = 1 # tiny blur to calm edge shimmer
119
-
120
- # hardening (turn soft mask into crisper 0/1)
121
- hard_low: float = 0.35 # values below -> 0
122
- hard_high: float = 0.70 # values above -> 1
123
- mask_gamma: float = 0.90 # <1 boosts mid-tones slightly
124
-
125
- # ---------- windowed two-phase control ----------
126
- use_windowed: bool = True # enable two-phase SAM2→MatAnyone per chunk
127
- window_size: int = 8 # frames per window
128
-
129
- # Back-compat alias used elsewhere in the app
130
- ProcessingConfig = ProcessorConfig
131
-
132
-
133
- def _env_bool(name: str, default: bool) -> bool:
134
- v = os.environ.get(name, None)
135
- if v is None:
136
- return default
137
- return str(v).strip().lower() not in ("0", "no", "false", "off", "")
138
-
139
-
140
- def _env_int(name: str, default: int) -> int:
141
- try:
142
- return int(os.environ.get(name, "").strip() or default)
143
- except Exception:
144
- return default
145
-
146
-
147
- class _FFmpegPipe:
148
- """
149
- Wrapper around an FFmpeg stdin pipe with encoder fallbacks and good error messages.
150
- """
151
-
152
- def __init__(self, width: int, height: int, fps: float, out_path: str, cfg: ProcessorConfig, log=_log):
153
- self.width = int(width)
154
- self.height = int(height)
155
- self.fps = float(fps) if fps and fps > 0 else 25.0
156
- self.out_path = out_path
157
- self.cfg = cfg
158
- self.log = log
159
-
160
- self.proc: Optional[subprocess.Popen] = None
161
- self.encoder_used: Optional[str] = None
162
- self._stderr: bytes | None = None
163
-
164
- self._ffmpeg = shutil.which("ffmpeg")
165
- if not self._ffmpeg:
166
- raise RuntimeError("ffmpeg not found on PATH")
167
-
168
- self._start_with_fallbacks()
169
-
170
- def _cmd_for_encoder(self, encoder: str) -> list[str]:
171
- base = [
172
- self._ffmpeg,
173
- "-hide_banner", "-loglevel", "error",
174
- "-y",
175
- # rawvideo input from stdin
176
- "-f", "rawvideo",
177
- "-vcodec", "rawvideo",
178
- "-pix_fmt", "bgr24",
179
- "-s", f"{self.width}x{self.height}",
180
- "-r", f"{self.fps}",
181
- "-i", "-", # stdin
182
- "-an", # no audio here
183
- ]
184
- if self.cfg.movflags_faststart:
185
- base += ["-movflags", "+faststart"]
186
-
187
- if encoder == "h264_nvenc":
188
- base += [
189
- "-c:v", "h264_nvenc",
190
- "-preset", self.cfg.nvenc_preset,
191
- "-cq", str(int(self.cfg.nvenc_cq)),
192
- "-pix_fmt", self.cfg.nvenc_pix_fmt,
193
- ]
194
- if self.cfg.nvenc_tune_hq:
195
- base += ["-tune", "hq"]
196
- elif encoder == "hevc_nvenc":
197
- base += [
198
- "-c:v", "hevc_nvenc",
199
- "-preset", self.cfg.nvenc_preset,
200
- "-cq", str(int(self.cfg.nvenc_cq)),
201
- "-pix_fmt", self.cfg.nvenc_pix_fmt,
202
- ]
203
- if self.cfg.nvenc_tune_hq:
204
- base += ["-tune", "hq"]
205
- elif encoder == "libx264":
206
- base += [
207
- "-c:v", "libx264",
208
- "-preset", self.cfg.x264_preset,
209
- "-crf", str(int(self.cfg.x264_crf)),
210
- "-pix_fmt", self.cfg.x264_pix_fmt,
211
- ]
212
- elif encoder == "mpeg4":
213
- base += [
214
- "-c:v", "mpeg4",
215
- "-q:v", "2",
216
- "-pix_fmt", "yuv420p",
217
- ]
218
- else:
219
- base += ["-c:v", "libx264", "-preset", self.cfg.x264_preset, "-crf", str(int(self.cfg.x264_crf)), "-pix_fmt", self.cfg.x264_pix_fmt]
220
-
221
- base += [self.out_path]
222
- return base
223
-
224
- def _try_start(self, enc: str) -> bool:
225
- cmd = self._cmd_for_encoder(enc)
226
- try:
227
- self.proc = subprocess.Popen(
228
- cmd,
229
- stdin=subprocess.PIPE,
230
- stderr=subprocess.PIPE,
231
- bufsize=10**7,
232
- )
233
- self.encoder_used = enc
234
- self.log.info("FFmpeg started: %s", " ".join(shlex.quote(c) for c in cmd))
235
- # quick poll: if ffmpeg dies immediately, fail fast
236
- time.sleep(0.05)
237
- if self.proc.poll() is not None:
238
- self._stderr = self.proc.stderr.read() if self.proc.stderr else b""
239
- self.log.warning("FFmpeg exited on start with %s: %s", enc, (self._stderr or b"").decode(errors="ignore"))
240
- self.proc = None
241
- return False
242
- return True
243
- except Exception as e:
244
- self.log.warning("Failed to start FFmpeg with %s: %s", enc, e)
245
- self.proc = None
246
- return False
247
-
248
- def _start_with_fallbacks(self):
249
- encoders = []
250
- if self.cfg.use_nvenc:
251
- encoders += ["h264_nvenc"] if self.cfg.nvenc_codec.lower() == "h264" else ["hevc_nvenc"]
252
- encoders += ["libx264", "mpeg4"]
253
- for enc in encoders:
254
- if self._try_start(enc):
255
- return
256
- msg = "Could not start FFmpeg with any encoder (nvenc/libx264/mpeg4). Is ffmpeg present and codecs available?"
257
- if self._stderr:
258
- msg += f" Stderr: {(self._stderr or b'').decode(errors='ignore')[:500]}"
259
- raise RuntimeError(msg)
260
-
261
- def write(self, frame_bgr: np.ndarray):
262
- if self.proc is None or self.proc.stdin is None:
263
- raise RuntimeError("FFmpeg process is not running (stdin is None).")
264
- if not isinstance(frame_bgr, np.ndarray) or frame_bgr.dtype != np.uint8:
265
- raise ValueError("Frame must be a np.ndarray of dtype uint8.")
266
- if frame_bgr.ndim != 3 or frame_bgr.shape[2] != 3:
267
- raise ValueError("Frame must have shape (H, W, 3).")
268
- if frame_bgr.shape[0] != self.height or frame_bgr.shape[1] != self.width:
269
- raise ValueError(f"Frame size mismatch. Expected {self.width}x{self.height}, got {frame_bgr.shape[1]}x{frame_bgr.shape[0]}.")
270
-
271
- frame_bgr = np.ascontiguousarray(frame_bgr)
272
- try:
273
- self.proc.stdin.write(frame_bgr.tobytes())
274
- except Exception as e:
275
- stderr = b""
276
- try:
277
- if self.proc and self.proc.stderr:
278
- stderr = self.proc.stderr.read()
279
- except Exception:
280
- pass
281
- msg = f"FFmpeg pipe write failed: {e}"
282
- if stderr:
283
- msg += f"\nffmpeg stderr: {(stderr or b'').decode(errors='ignore')[:1000]}"
284
- raise BrokenPipeError(msg)
285
-
286
- def close(self):
287
- if self.proc is None:
288
- return
289
- try:
290
- if self.proc.stdin:
291
- try:
292
- self.proc.stdin.flush()
293
- except Exception:
294
- pass
295
- try:
296
- self.proc.stdin.close()
297
- except Exception:
298
- pass
299
- if self.proc.stderr:
300
- try:
301
- err = self.proc.stderr.read()
302
- if err:
303
- self.log.debug("FFmpeg stderr (tail): %s", err.decode(errors="ignore")[-2000:])
304
- except Exception:
305
- pass
306
- self.proc.wait(timeout=10)
307
- except Exception:
308
- try:
309
- self.proc.kill()
310
- except Exception:
311
- pass
312
- finally:
313
- self.proc = None
314
-
315
 
316
  class CoreVideoProcessor:
317
  """
318
  Minimal, safe implementation used by core/app.py.
319
- It relies on a models provider (e.g., ModelLoader) that implements:
320
- - get_sam2()
321
- - get_matanyone()
322
- and uses utils.cv_processing for the pipeline.
323
-
324
- Supports progress callback and cancellation via stop_event.
325
  """
326
 
327
  def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
@@ -331,6 +13,7 @@ def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[An
331
  if self.models is None:
332
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
333
  self._ffmpeg = shutil.which("ffmpeg")
 
334
 
335
  # -------- Back-compat safe config flags (do not require attrs on user config)
336
  self._use_windowed = _env_bool(
@@ -350,6 +33,33 @@ def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[An
350
  self._chunk_size = 12
351
  self._chunk_idx = 0
352
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  # ---------- mask post-processing (stability + crispness) ----------
354
  def _iou(self, a: np.ndarray, b: np.ndarray, thr: float = 0.5) -> float:
355
  a_bin = (a >= thr).astype(np.uint8)
@@ -826,6 +536,3 @@ def process_video(
826
  "output_path": output_path,
827
  }
828
 
829
-
830
- # Backward-compat alias used elsewhere
831
- VideoProcessor = CoreVideoProcessor
 
1
+ # ... (all your imports and CoreVideoProcessor class header/attributes as above)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  class CoreVideoProcessor:
4
  """
5
  Minimal, safe implementation used by core/app.py.
6
+ ...
 
 
 
 
 
7
  """
8
 
9
  def __init__(self, config: Optional[ProcessorConfig] = None, models: Optional[Any] = None):
 
13
  if self.models is None:
14
  self.log.warning("CoreVideoProcessor initialized without a models provider; will use fallbacks.")
15
  self._ffmpeg = shutil.which("ffmpeg")
16
+ # (rest as before...)
17
 
18
  # -------- Back-compat safe config flags (do not require attrs on user config)
19
  self._use_windowed = _env_bool(
 
33
  self._chunk_size = 12
34
  self._chunk_idx = 0
35
 
36
+ # ---------------- ADDED METHOD ----------------
37
+ def prepare_background(self, background_choice: str, custom_background_path: Optional[str], width: int, height: int) -> np.ndarray:
38
+ """
39
+ Prepares a background image for compositing.
40
+ If a valid custom background path is given, loads and resizes it. Otherwise, uses a preset.
41
+ Returns: np.ndarray RGB (H, W, 3) uint8
42
+ """
43
+ import cv2
44
+ from utils.cv_processing import create_professional_background
45
+
46
+ if custom_background_path:
47
+ try:
48
+ img = cv2.imread(custom_background_path, cv2.IMREAD_COLOR)
49
+ if img is not None:
50
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
51
+ img = cv2.resize(img, (width, height), interpolation=cv2.INTER_LANCZOS4)
52
+ return img
53
+ else:
54
+ self.log.warning(f"Failed to load custom background from '{custom_background_path}', using preset.")
55
+ except Exception as e:
56
+ self.log.warning(f"Exception loading custom background: {e}, using preset.")
57
+
58
+ # fallback to preset
59
+ return create_professional_background(background_choice, width, height)
60
+
61
+ # (rest of class unchanged...)
62
+
63
  # ---------- mask post-processing (stability + crispness) ----------
64
  def _iou(self, a: np.ndarray, b: np.ndarray, thr: float = 0.5) -> float:
65
  a_bin = (a >= thr).astype(np.uint8)
 
536
  "output_path": output_path,
537
  }
538