devkunalnaik commited on
Commit
6faf48e
Β·
1 Parent(s): d5e1b6d

Perf: source face detected once, target faces cached every 5 frames, 720p video processing

Browse files
processors/face_swap.py CHANGED
@@ -228,3 +228,64 @@ class FaceSwapper:
228
 
229
  except Exception as exc:
230
  return None, f"Face swap error: {exc}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  except Exception as exc:
230
  return None, f"Face swap error: {exc}"
231
+
232
+ def get_source_face(self, source_bgr: np.ndarray):
233
+ """
234
+ Detect and return the first face in *source_bgr*.
235
+ Call once before a video loop and reuse the result in swap_frame().
236
+
237
+ Returns:
238
+ face object or None
239
+ """
240
+ self._init()
241
+ faces = self._app.get(source_bgr)
242
+ return faces[0] if faces else None
243
+
244
+ def swap_frame(
245
+ self,
246
+ target_bgr: np.ndarray,
247
+ source_face,
248
+ cached_target_faces=None,
249
+ enhance: bool = False,
250
+ ):
251
+ """
252
+ Fast path for video β€” reuses a pre-computed source_face and optionally
253
+ cached target faces (re-detection skipped when supplied).
254
+
255
+ Returns:
256
+ (result_bgr, target_faces_used)
257
+ """
258
+ self._init()
259
+
260
+ # Cap video frames at 720p for speed; quality still good for motion
261
+ MAX_VIDEO_DIM = 720
262
+ orig_h, orig_w = target_bgr.shape[:2]
263
+ scale_down = 1.0
264
+ if max(orig_h, orig_w) > MAX_VIDEO_DIM:
265
+ scale_down = MAX_VIDEO_DIM / max(orig_h, orig_w)
266
+ target_bgr = cv2.resize(
267
+ target_bgr,
268
+ (int(orig_w * scale_down), int(orig_h * scale_down)),
269
+ interpolation=cv2.INTER_LINEAR,
270
+ )
271
+
272
+ if cached_target_faces is None:
273
+ target_faces = self._app.get(target_bgr)
274
+ else:
275
+ target_faces = cached_target_faces
276
+
277
+ if not target_faces:
278
+ return None, []
279
+
280
+ result = target_bgr.copy()
281
+ for tgt_face in target_faces:
282
+ result = self._swapper.get(result, tgt_face, source_face, paste_back=True)
283
+
284
+ if enhance:
285
+ result = self._enhance_opencv(result, target_faces)
286
+
287
+ # Scale back up to original frame size
288
+ if scale_down < 1.0:
289
+ result = cv2.resize(result, (orig_w, orig_h), interpolation=cv2.INTER_LINEAR)
290
+
291
+ return result, target_faces
processors/video_processor.py CHANGED
@@ -2,8 +2,14 @@
2
  Video processor β€” extracts frames from an input video, applies face or body
3
  swap to each frame, then re-encodes the result with FFmpeg (audio preserved).
4
 
5
- A hard cap of MAX_FRAMES is enforced to keep processing times reasonable on
6
- free GPU tiers.
 
 
 
 
 
 
7
  """
8
 
9
  import cv2
@@ -12,7 +18,8 @@ import tempfile
12
  import numpy as np
13
  from pathlib import Path
14
 
15
- MAX_FRAMES = 600 # ~20 s at 30 fps β€” raise for paid/GPU tiers
 
16
 
17
 
18
  class VideoProcessor:
@@ -45,9 +52,9 @@ class VideoProcessor:
45
  if not cap.isOpened():
46
  return None, "Could not open video file."
47
 
48
- fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
49
- width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
50
- height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
51
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
52
 
53
  if total_frames > MAX_FRAMES:
@@ -58,14 +65,23 @@ class VideoProcessor:
58
  "Please trim the video and try again."
59
  )
60
 
 
 
 
 
 
 
 
 
61
  # Temp file for raw processed frames (mp4v codec)
62
  raw_out_path = tempfile.mktemp(suffix="_raw.mp4")
63
- fourcc = cv2.VideoWriter_fourcc(*"mp4v")
64
- writer = cv2.VideoWriter(raw_out_path, fourcc, fps, (width, height))
65
 
66
- frame_idx = 0
67
- processed = 0
68
- errors = 0
 
69
 
70
  while True:
71
  ret, frame = cap.read()
@@ -78,10 +94,19 @@ class VideoProcessor:
78
  f"Processing frame {frame_idx + 1} / {total_frames}",
79
  )
80
 
81
- result_frame = self._process_frame(
82
- source_bgr, frame, mode, enhance, blend_strength
 
 
 
 
 
83
  )
84
 
 
 
 
 
85
  if result_frame is not None:
86
  writer.write(result_frame)
87
  processed += 1
@@ -97,7 +122,6 @@ class VideoProcessor:
97
  # Re-encode with H.264 and merge original audio via FFmpeg
98
  final_path = self._ffmpeg_encode(video_path, raw_out_path)
99
 
100
- # Clean up raw file
101
  try:
102
  os.unlink(raw_out_path)
103
  except OSError:
@@ -119,19 +143,27 @@ class VideoProcessor:
119
  mode: str,
120
  enhance: bool,
121
  blend_strength: float,
122
- ) -> np.ndarray | None:
 
 
 
123
  try:
124
  if mode == "face" and self.face_swapper:
125
- result, _ = self.face_swapper.swap(source_bgr, frame, enhance=enhance)
126
- return result
 
 
 
 
 
127
  elif mode == "body" and self.body_swapper:
128
  result, _ = self.body_swapper.swap(
129
  source_bgr, frame, blend_strength=blend_strength
130
  )
131
- return result
132
  except Exception as e:
133
  print(f"[VideoProcessor] Frame error: {e}")
134
- return None
135
 
136
  @staticmethod
137
  def _ffmpeg_encode(original_video_path: str, processed_raw_path: str) -> str:
 
2
  Video processor β€” extracts frames from an input video, applies face or body
3
  swap to each frame, then re-encodes the result with FFmpeg (audio preserved).
4
 
5
+ Speed optimisations
6
+ -------------------
7
+ * Source face is detected **once** before the loop (never per-frame).
8
+ * Target face detection is cached and reused for DET_INTERVAL frames β€” faces
9
+ don't move much between consecutive frames at normal frame rates.
10
+ * Video frames are capped at 720p for processing (upscaled back for writing).
11
+ * A hard cap of MAX_FRAMES is enforced to keep processing times reasonable on
12
+ free CPU tiers.
13
  """
14
 
15
  import cv2
 
18
  import numpy as np
19
  from pathlib import Path
20
 
21
+ MAX_FRAMES = 600 # ~20 s at 30 fps
22
+ DET_INTERVAL = 5 # re-detect target faces every N frames
23
 
24
 
25
  class VideoProcessor:
 
52
  if not cap.isOpened():
53
  return None, "Could not open video file."
54
 
55
+ fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
56
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
57
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
58
  total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
59
 
60
  if total_frames > MAX_FRAMES:
 
65
  "Please trim the video and try again."
66
  )
67
 
68
+ # ── Pre-compute source face once (big win for face-swap mode) ─────────
69
+ source_face = None
70
+ if mode == "face" and self.face_swapper:
71
+ source_face = self.face_swapper.get_source_face(source_bgr)
72
+ if source_face is None:
73
+ cap.release()
74
+ return None, "No face detected in source image."
75
+
76
  # Temp file for raw processed frames (mp4v codec)
77
  raw_out_path = tempfile.mktemp(suffix="_raw.mp4")
78
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
79
+ writer = cv2.VideoWriter(raw_out_path, fourcc, fps, (width, height))
80
 
81
+ frame_idx = 0
82
+ processed = 0
83
+ errors = 0
84
+ cached_tgt_faces = None # reused across DET_INTERVAL frames
85
 
86
  while True:
87
  ret, frame = cap.read()
 
94
  f"Processing frame {frame_idx + 1} / {total_frames}",
95
  )
96
 
97
+ # Only re-detect target faces every DET_INTERVAL frames
98
+ use_cache = (mode == "face") and (frame_idx % DET_INTERVAL != 0) and (cached_tgt_faces is not None)
99
+
100
+ result_frame, new_faces = self._process_frame(
101
+ source_bgr, frame, mode, enhance, blend_strength,
102
+ source_face=source_face,
103
+ cached_target_faces=cached_tgt_faces if use_cache else None,
104
  )
105
 
106
+ # Refresh cache after a detection frame
107
+ if mode == "face" and new_faces is not None:
108
+ cached_tgt_faces = new_faces if new_faces else cached_tgt_faces
109
+
110
  if result_frame is not None:
111
  writer.write(result_frame)
112
  processed += 1
 
122
  # Re-encode with H.264 and merge original audio via FFmpeg
123
  final_path = self._ffmpeg_encode(video_path, raw_out_path)
124
 
 
125
  try:
126
  os.unlink(raw_out_path)
127
  except OSError:
 
143
  mode: str,
144
  enhance: bool,
145
  blend_strength: float,
146
+ source_face=None,
147
+ cached_target_faces=None,
148
+ ):
149
+ """Returns (result_frame_or_None, detected_faces_or_None)."""
150
  try:
151
  if mode == "face" and self.face_swapper:
152
+ result, faces = self.face_swapper.swap_frame(
153
+ frame,
154
+ source_face,
155
+ cached_target_faces=cached_target_faces,
156
+ enhance=enhance,
157
+ )
158
+ return result, faces
159
  elif mode == "body" and self.body_swapper:
160
  result, _ = self.body_swapper.swap(
161
  source_bgr, frame, blend_strength=blend_strength
162
  )
163
+ return result, None
164
  except Exception as e:
165
  print(f"[VideoProcessor] Frame error: {e}")
166
+ return None, None
167
 
168
  @staticmethod
169
  def _ffmpeg_encode(original_video_path: str, processed_raw_path: str) -> str: