File size: 12,705 Bytes
acb9f1e
 
 
 
6faf48e
 
 
 
 
 
 
 
acb9f1e
 
 
 
 
 
 
 
6faf48e
47c8f24
acb9f1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937ca9c
a5190e9
acb9f1e
 
 
 
a5190e9
 
acb9f1e
 
 
 
 
 
 
 
6faf48e
 
 
acb9f1e
 
a5190e9
 
 
 
 
acb9f1e
 
a5190e9
 
 
acb9f1e
 
6faf48e
 
 
 
 
 
 
 
de1deba
 
 
 
a5190e9
de1deba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5190e9
7f39f97
 
1c23235
 
 
7f39f97
 
1c23235
7f39f97
 
 
 
1c23235
acb9f1e
a5190e9
6faf48e
 
a5190e9
 
acb9f1e
a5190e9
 
 
 
 
 
 
 
 
 
 
 
 
de1deba
 
 
 
 
a5190e9
 
 
 
 
 
 
 
 
 
 
937ca9c
a5190e9
 
6faf48e
a5190e9
1c23235
 
 
 
a5190e9
 
 
 
1c23235
 
 
 
 
a5190e9
acb9f1e
a5190e9
6faf48e
a5190e9
 
acb9f1e
a5190e9
 
 
de1deba
 
 
 
 
acb9f1e
a5190e9
 
 
 
 
 
 
acb9f1e
 
a5190e9
 
 
acb9f1e
 
 
 
 
 
a5190e9
acb9f1e
a5190e9
 
 
acb9f1e
 
 
 
 
 
 
 
 
 
 
 
6faf48e
 
 
 
acb9f1e
 
6faf48e
 
 
 
 
 
 
acb9f1e
 
 
 
6faf48e
acb9f1e
 
6faf48e
acb9f1e
 
a5190e9
acb9f1e
7f39f97
 
 
acb9f1e
 
 
 
7f39f97
acb9f1e
7f39f97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c23235
7f39f97
1c23235
 
7f39f97
 
 
acb9f1e
 
7f39f97
acb9f1e
7f39f97
acb9f1e
7f39f97
 
 
 
 
acb9f1e
 
7f39f97
 
 
 
 
acb9f1e
7f39f97
acb9f1e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
"""
Video processor β€” extracts frames from an input video, applies face or body
swap to each frame, then re-encodes the result with FFmpeg (audio preserved).

Speed optimisations
-------------------
* Source face is detected **once** before the loop (never per-frame).
* Target face detection is cached and reused for DET_INTERVAL frames β€” faces
  don't move much between consecutive frames at normal frame rates.
* Video frames are capped at 720p for processing (upscaled back for writing).
* A hard cap of MAX_FRAMES is enforced to keep processing times reasonable on
  free CPU tiers.
"""

import cv2
import os
import tempfile
import numpy as np
from pathlib import Path

MAX_FRAMES   = 600   # ~20 s at 30 fps
DET_INTERVAL = 1     # detect faces every frame β€” caching causes flicker when face moves


class VideoProcessor:
    def __init__(
        self,
        face_swapper=None,
        body_swapper=None,
    ):
        self.face_swapper = face_swapper
        self.body_swapper = body_swapper

    # ── Public API ────────────────────────────────────────────────────────────

    def process_video(
        self,
        source_bgr: np.ndarray,
        video_path: str,
        mode: str = "face",          # "face" | "body"
        enhance: bool = False,
        blend_strength: float = 0.85,
        fast_mode: bool = False,     # skip every other frame (~2x speed)
        start_frame: int = 0,        # resume from this frame index
        progress=None,
    ) -> tuple[str | None, str]:
        """
        Process every frame of *video_path*, applying the selected swap mode.
        Set *start_frame* > 0 to resume after a dropped connection.
        Partial output is always saved β€” even if processing is interrupted.

        Returns:
            (output_path, status_message)
        """
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            return None, "Could not open video file."

        fps          = cap.get(cv2.CAP_PROP_FPS) or 25.0
        width        = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height       = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        # Clamp start_frame
        start_frame = max(0, min(start_frame, total_frames - 1))
        remaining   = total_frames - start_frame

        if remaining > MAX_FRAMES:
            cap.release()
            return None, (
                f"Segment starting at frame {start_frame} has {remaining} frames β€” "
                f"maximum allowed is {MAX_FRAMES} (~{MAX_FRAMES / fps:.0f} s at {fps:.0f} fps). "
                "Increase the start frame or trim the video."
            )

        # ── Pre-compute source face once (big win for face-swap mode) ─────────
        source_face = None
        if mode == "face" and self.face_swapper:
            source_face = self.face_swapper.get_source_face(source_bgr)
            if source_face is None:
                cap.release()
                return None, "No face detected in source image."

        # ── Seek to start_frame β€” use FFmpeg cut for instant seek ──────────────
        # cap.set(POS_FRAMES) is slow: OpenCV decodes every frame up to the
        # target.  FFmpeg keyframe-seeks in milliseconds.
        segment_path = None
        if start_frame > 0:
            start_time = start_frame / fps
            segment_path = tempfile.mktemp(suffix="_segment.mp4")
            try:
                import ffmpeg as _ffmpeg
                (
                    _ffmpeg.input(video_path, ss=start_time)
                    .output(segment_path, c="copy", avoid_negative_ts="make_zero")
                    .overwrite_output()
                    .run(quiet=True)
                )
                cap.release()
                cap = cv2.VideoCapture(segment_path)
                print(f"[VideoProcessor] Resumed via FFmpeg cut at frame {start_frame} ({start_time:.2f}s)")
            except Exception as e:
                print(f"[VideoProcessor] FFmpeg seek failed ({e}), falling back to slow seek")
                segment_path = None
                cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

        # Use AVI + XVID for the intermediate file β€” far more reliable than
        # mp4v on Linux (HF Spaces).  FFmpeg converts it to H.264/mp4 after.
        # XVID/MJPG require even dimensions β€” round down if necessary.
        enc_w = width  - (width  % 2)
        enc_h = height - (height % 2)
        raw_out_path = tempfile.mktemp(suffix="_raw.avi")
        fourcc       = cv2.VideoWriter_fourcc(*"XVID")
        writer       = cv2.VideoWriter(raw_out_path, fourcc, fps, (enc_w, enc_h))
        if not writer.isOpened():
            # XVID not available β€” fall back to MJPG
            raw_out_path = tempfile.mktemp(suffix="_raw.avi")
            fourcc  = cv2.VideoWriter_fourcc(*"MJPG")
            writer  = cv2.VideoWriter(raw_out_path, fourcc, fps, (enc_w, enc_h))

        frame_idx        = start_frame   # absolute frame number in the source video
        processed        = 0
        errors           = 0
        cached_tgt_faces = None
        last_result      = None

        try:
            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                if progress is not None and total_frames > 0:
                    progress(
                        (frame_idx - start_frame) / remaining,
                        f"Frame {frame_idx + 1} / {total_frames}  "
                        f"(resume at {frame_idx} if interrupted)",
                    )

                # Fast mode: skip odd frames β€” write the ORIGINAL frame (not a
                # duplicate) so motion stays smooth with no stutter or blur.
                # Only applies to face swap; body swap needs every frame.
                if fast_mode and mode == "face" and (frame_idx - start_frame) % 2 == 1:
                    writer.write(frame)   # original frame keeps motion fluid
                    frame_idx += 1
                    continue

                # Only re-detect target faces every DET_INTERVAL frames
                use_cache = (mode == "face") and (frame_idx % DET_INTERVAL != 0) and (cached_tgt_faces is not None)

                result_frame, new_faces = self._process_frame(
                    source_bgr, frame, mode, enhance, blend_strength,
                    source_face=source_face,
                    cached_target_faces=cached_tgt_faces if use_cache else None,
                )

                if mode == "face" and new_faces is not None:
                    cached_tgt_faces = new_faces if new_faces else cached_tgt_faces

                if result_frame is not None:
                    # Ensure frame matches writer dimensions (even crop if needed)
                    rf_h, rf_w = result_frame.shape[:2]
                    if rf_w != enc_w or rf_h != enc_h:
                        result_frame = cv2.resize(result_frame, (enc_w, enc_h), interpolation=cv2.INTER_LINEAR)
                    writer.write(result_frame)
                    last_result = result_frame
                    processed += 1
                else:
                    frm = frame[:enc_h, :enc_w] if (frame.shape[1] > enc_w or frame.shape[0] > enc_h) else frame
                    if frm.shape[1] != enc_w or frm.shape[0] != enc_h:
                        frm = cv2.resize(frm, (enc_w, enc_h), interpolation=cv2.INTER_LINEAR)
                    writer.write(frm)
                    last_result = frm
                    errors += 1

                frame_idx += 1

        except Exception as loop_err:
            print(f"[VideoProcessor] Loop interrupted at frame {frame_idx}: {loop_err}")

        finally:
            cap.release()
            writer.release()
            if segment_path:
                try:
                    os.unlink(segment_path)
                except OSError:
                    pass

        frames_done = frame_idx - start_frame
        if frames_done == 0:
            try:
                os.unlink(raw_out_path)
            except OSError:
                pass
            return None, f"No frames processed. Try resuming from frame {start_frame}."

        # Re-encode with H.264 and merge original audio via FFmpeg
        # Pass start_time so audio lines up with the resumed segment
        start_time = start_frame / fps
        final_path = self._ffmpeg_encode(video_path, raw_out_path, audio_start=start_time)

        try:
            os.unlink(raw_out_path)
        except OSError:
            pass

        partial = frames_done < remaining
        status = (
            f"{'Partial β€” ' if partial else ''}Frames {start_frame}–{frame_idx - 1} "
            f"({processed} swapped{', ' + str(errors) + ' skipped' if errors else ''}). "
            + (f"Resume from frame {frame_idx} to continue." if partial else "Done.")
        )
        return final_path, status

    # ── Internal helpers ──────────────────────────────────────────────────────

    def _process_frame(
        self,
        source_bgr: np.ndarray,
        frame: np.ndarray,
        mode: str,
        enhance: bool,
        blend_strength: float,
        source_face=None,
        cached_target_faces=None,
    ):
        """Returns (result_frame_or_None, detected_faces_or_None)."""
        try:
            if mode == "face" and self.face_swapper:
                result, faces = self.face_swapper.swap_frame(
                    frame,
                    source_face,
                    cached_target_faces=cached_target_faces,
                    enhance=enhance,
                )
                return result, faces
            elif mode == "body" and self.body_swapper:
                result, _ = self.body_swapper.swap(
                    source_bgr, frame, blend_strength=blend_strength
                )
                return result, None
        except Exception as e:
            print(f"[VideoProcessor] Frame error: {e}")
        return None, None

    @staticmethod
    def _ffmpeg_encode(original_video_path: str, processed_raw_path: str, audio_start: float = 0.0) -> str:
        """
        Re-encode processed frames as H.264 mp4 and merge the original audio.
        audio_start: seconds into the original audio (for resumed segments).
        Returns the output path; raises if encoding fails so caller can report it.
        """
        final_path = tempfile.mktemp(suffix="_output.mp4")
        try:
            import ffmpeg
            import subprocess

            video_in = ffmpeg.input(processed_raw_path)
            audio_in = ffmpeg.input(original_video_path)

            # Build output streams
            streams = [video_in.video]
            # Only attach audio if the source has an audio track
            try:
                probe = ffmpeg.probe(original_video_path)
                has_audio = any(s["codec_type"] == "audio" for s in probe["streams"])
            except Exception:
                has_audio = False

            if has_audio:
                if audio_start > 0:
                    audio_in = ffmpeg.input(original_video_path, ss=audio_start)
                streams.append(audio_in.audio)

            out_kwargs = dict(
                vcodec="libx264",
                crf=18,
                preset="fast",
                pix_fmt="yuv420p",
                **{"vf": "unsharp=3:3:0.3:3:3:0.0"},  # subtle luma sharpening, no ringing
            )
            if has_audio:
                out_kwargs.update(acodec="aac", audio_bitrate="192k")

            (
                ffmpeg.output(*streams, final_path, **out_kwargs)
                .overwrite_output()
                .run(quiet=False, capture_stdout=True, capture_stderr=True)
            )

            # Validate output
            if not os.path.exists(final_path) or os.path.getsize(final_path) < 1024:
                raise RuntimeError("FFmpeg produced an empty output file.")

            return final_path

        except ffmpeg.Error as e:
            stderr = e.stderr.decode(errors="replace") if e.stderr else ""
            print(f"[VideoProcessor] FFmpeg error:\n{stderr}")
            # Return the raw file as fallback so the user gets something
            return processed_raw_path
        except Exception as e:
            print(f"[VideoProcessor] FFmpeg encode failed: {e}")
            return processed_raw_path