File size: 14,857 Bytes
359c2e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
import cv2
import numpy as np
import mediapipe as mp
from PIL import Image
import os
import tempfile
from pathlib import Path
import torch
import torch.nn.functional as F
from facenet_pytorch import MTCNN
from utils import *

class VideoCharacterReplacer:
    def __init__(self):
        """Initialize the video character replacer with detection and processing models"""
        self.mp_face_detection = mp.solutions.face_detection
        self.mp_drawing = mp.solutions.drawing_utils
        self.mp_face_mesh = mp.solutions.face_mesh
        self.face_detection = self.mp_face_detection.FaceDetection(
            model_selection=0, min_detection_confidence=0.5
        )
        self.face_mesh = self.mp_face_mesh.FaceMesh(
            static_image_mode=True,
            max_num_faces=1,
            refine_landmarks=True
        )
        
        # Initialize MTCNN for more robust face detection
        self.mtcnn = MTCNN(
            image_size=224,
            margin=20,
            min_face_size=100,
            thresholds=[0.6, 0.7, 0.7],
            factor=0.709,
            post=True
        )
        
        # Face swap model or technique will be implemented here
        self.face_swapper = FaceSwapper()
    
    def replace_character(self, ref_image_path, input_video_path, 
                         replacement_strength=0.8, detection_sensitivity=0.6,
                         tracking_stability=0.7, preserve_background=True):
        """
        Replace character in video with reference image
        
        Args:
            ref_image_path (str): Path to reference image
            input_video_path (str): Path to input video
            replacement_strength (float): Strength of replacement (0-1)
            detection_sensitivity (float): Detection sensitivity (0-1)
            tracking_stability (float): Tracking stability (0-1)
            preserve_background (bool): Whether to preserve background
        
        Returns:
            str: Path to output video
        """
        try:
            # Load reference image
            ref_image = cv2.imread(ref_image_path)
            ref_image_rgb = cv2.cvtColor(ref_image, cv2.COLOR_BGR2RGB)
            
            # Initialize video capture
            cap = cv2.VideoCapture(input_video_path)
            
            # Get video properties
            fps = int(cap.get(cv2.CAP_PROP_FPS))
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            
            # Setup output video writer
            output_path = tempfile.mktemp(suffix='.mp4')
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
            
            # Process each frame
            prev_face_landmarks = None
            frame_count = 0
            
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                
                frame_count += 1
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                # Detect faces in current frame
                faces = self.detect_faces(frame_rgb, detection_sensitivity)
                
                if faces:
                    # Get the most prominent face
                    face = faces[0]
                    
                    # Extract face landmarks
                    landmarks = self.get_face_landmarks(frame_rgb, face)
                    
                    if landmarks:
                        # Apply temporal consistency
                        if prev_face_landmarks is not None and tracking_stability > 0.5:
                            landmarks = self.apply_temporal_consistency(
                                landmarks, prev_face_landmarks, tracking_stability
                            )
                        
                        # Replace character in frame
                        processed_frame = self.face_swapper.replace_face(
                            frame_rgb,
                            ref_image_rgb,
                            landmarks,
                            replacement_strength,
                            preserve_background
                        )
                        
                        prev_face_landmarks = landmarks.copy()
                    else:
                        processed_frame = frame_rgb
                else:
                    processed_frame = frame_rgb
                
                # Convert back to BGR and write frame
                frame_bgr = cv2.cvtColor(processed_frame, cv2.COLOR_RGB2BGR)
                out.write(frame_bgr)
            
            # Release resources
            cap.release()
            out.release()
            
            return output_path
            
        except Exception as e:
            print(f"Error in video processing: {e}")
            return None
    
    def detect_faces(self, image, sensitivity=0.6):
        """
        Detect faces in image using multiple methods
        
        Args:
            image (numpy.ndarray): Input image in RGB format
            sensitivity (float): Detection sensitivity (0-1)
        
        Returns:
            list: List of detected faces
        """
        faces = []
        
        # MediaPipe face detection
        results = self.face_detection.process(image)
        if results.detections:
            for detection in results.detections:
                bboxC = detection.location_data.relative_bounding_box
                ih, iw, _ = image.shape
                bbox = int(bboxC.xmin * iw), int(bboxC.ymin * ih), \
                       int(bboxC.width * iw), int(bboxC.height * ih)
                faces.append({
                    'bbox': bbox,
                    'confidence': detection.score[0],
                    'method': 'mediapipe'
                })
        
        # MTCNN for additional detection if sensitivity is high
        if sensitivity > 0.7:
            try:
                boxes, probs = self.mtcnn.detect(image)
                if boxes is not None:
                    for box, prob in zip(boxes, probs):
                        if prob > 0.9:
                            faces.append({
                                'bbox': [int(x) for x in box],
                                'confidence': prob,
                                'method': 'mtcnn'
                            })
            except Exception as e:
                print(f"MTCNN detection error: {e}")
        
        # Sort by confidence and remove overlaps
        faces = sorted(faces, key=lambda x: x['confidence'], reverse=True)
        return self.remove_overlapping_faces(faces)
    
    def get_face_landmarks(self, image, face):
        """
        Extract facial landmarks for the detected face
        
        Args:
            image (numpy.ndarray): Input image
            face (dict): Face detection result
        
        Returns:
            numpy.ndarray: Facial landmarks
        """
        try:
            # Use MediaPipe face mesh for detailed landmarks
            results = self.face_mesh.process(image)
            if results.multi_face_landmarks:
                # Get landmarks for the first (most confident) face
                landmarks = results.multi_face_landmarks[0]
                landmark_points = np.array([[lm.x * image.shape[1], lm.y * image.shape[0]] 
                                          for lm in landmark.landmark])
                return landmark_points
        except Exception as e:
            print(f"Landmark extraction error: {e}")
        
        # Fallback to basic bounding box if landmarks unavailable
        bbox = face['bbox']
        return np.array([
            [bbox[0], bbox[1]],           # Top-left
            [bbox[0] + bbox[2], bbox[1]], # Top-right
            [bbox[0], bbox[1] + bbox[3]], # Bottom-left
            [bbox[0] + bbox[2], bbox[1] + bbox[3]] # Bottom-right
        ])
    
    def apply_temporal_consistency(self, current_landmarks, prev_landmarks, stability):
        """
        Apply temporal consistency to smooth landmark tracking
        
        Args:
            current_landmarks (numpy.ndarray): Current frame landmarks
            prev_landmarks (numpy.ndarray): Previous frame landmarks
            stability (float): Stability factor (0-1)
        
        Returns:
            numpy.ndarray: Stabilized landmarks
        """
        # Simple smoothing based on previous frame
        alpha = stability
        stabilized = alpha * prev_landmarks + (1 - alpha) * current_landmarks
        return stabilized
    
    def remove_overlapping_faces(self, faces, overlap_threshold=0.5):
        """
        Remove overlapping face detections
        
        Args:
            faces (list): List of face detections
            overlap_threshold (float): IoU threshold for overlap removal
        
        Returns:
            list: Non-overlapping face detections
        """
        if len(faces) <= 1:
            return faces
        
        non_overlapping = []
        for i, face1 in enumerate(faces):
            bbox1 = face1['bbox']
            keep = True
            
            for j, face2 in enumerate(faces):
                if i != j:
                    bbox2 = face2['bbox']
                    # Calculate IoU
                    x1 = max(bbox1[0], bbox2[0])
                    y1 = max(bbox1[1], bbox2[1])
                    x2 = min(bbox1[0] + bbox1[2], bbox2[0] + bbox2[2])
                    y2 = min(bbox1[1] + bbox1[3], bbox2[1] + bbox2[3])
                    
                    if x2 > x1 and y2 > y1:
                        intersection = (x2 - x1) * (y2 - y1)
                        union = (bbox1[2] * bbox1[3]) + (bbox2[2] * bbox2[3]) - intersection
                        iou = intersection / union if union > 0 else 0
                        
                        if iou > overlap_threshold:
                            # Keep the face with higher confidence
                            if face2['confidence'] > face1['confidence']:
                                keep = False
                                break
            
            if keep:
                non_overlapping.append(face1)
        
        return non_overlapping

class FaceSwapper:
    def __init__(self):
        """Initialize face swapping functionality"""
        self.face_analyzer = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    
    def replace_face(self, target_image, source_image, landmarks, 
                    replacement_strength=0.8, preserve_background=True):
        """
        Replace face in target image with face from source image
        
        Args:
            target_image (numpy.ndarray): Target image
            source_image (numpy.ndarray): Source image with replacement face
            landmarks (numpy.ndarray): Facial landmarks
            replacement_strength (float): Replacement strength (0-1)
            preserve_background (bool): Whether to preserve background
        
        Returns:
            numpy.ndarray: Image with replaced face
        """
        try:
            # Create a mask based on facial landmarks
            mask = self.create_face_mask(target_image, landmarks)
            
            # Apply color transfer for better blending
            source_face = self.extract_face_region(source_image, landmarks)
            target_face = self.extract_face_region(target_image, landmarks)
            
            # Apply color matching if preserve_background is True
            if preserve_background:
                source_face = self.match_color_statistics(source_face, target_face)
            
            # Blend the faces
            result = target_image.copy()
            for i in range(3):  # For each color channel
                result[:, :, i] = (1 - replacement_strength) * target_image[:, :, i] + \
                                replacement_strength * source_face[:, :, i] * mask + \
                                target_image[:, :, i] * (1 - mask)
            
            return result.astype(np.uint8)
            
        except Exception as e:
            print(f"Face replacement error: {e}")
            return target_image
    
    def create_face_mask(self, image, landmarks):
        """
        Create a mask for the face region
        
        Args:
            image (numpy.ndarray): Input image
            landmarks (numpy.ndarray): Facial landmarks
        
        Returns:
            numpy.ndarray: Face mask
        """
        mask = np.zeros(image.shape[:2], dtype=np.float32)
        
        # Use convex hull of landmarks to create face mask
        hull = cv2.convexHull(landmarks.astype(np.int32))
        cv2.fillPoly(mask, [hull], 1.0)
        
        # Apply Gaussian blur for smooth edges
        mask = cv2.GaussianBlur(mask, (15, 15), 0)
        
        return mask
    
    def extract_face_region(self, image, landmarks):
        """
        Extract face region based on landmarks
        
        Args:
            image (numpy.ndarray): Input image
            landmarks (numpy.ndarray): Facial landmarks
        
        Returns:
            numpy.ndarray: Extracted face region
        """
        # Get bounding box of face
        x_min = int(np.min(landmarks[:, 0]))
        x_max = int(np.max(landmarks[:, 0]))
        y_min = int(np.min(landmarks[:, 1]))
        y_max = int(np.max(landmarks[:, 1]))
        
        # Expand bounding box slightly
        padding = 20
        x_min = max(0, x_min - padding)
        x_max = min(image.shape[1], x_max + padding)
        y_min = max(0, y_min - padding)
        y_max = min(image.shape[0], y_max + padding)
        
        return image[y_min:y_max, x_min:x_max]
    
    def match_color_statistics(self, source, target):
        """
        Match color statistics between source and target faces
        
        Args:
            source (numpy.ndarray): Source face
            target (numpy.ndarray): Target face
        
        Returns:
            numpy.ndarray: Color-matched source face
        """
        result = source.copy().astype(np.float32)
        
        for i in range(3):  # For each color channel
            source_mean = np.mean(source[:, :, i])
            source_std = np.std(source[:, :, i])
            target_mean = np.mean(target[:, :, i])
            target_std = np.std(target[:, :, i])
            
            # Avoid division by zero
            if source_std > 0:
                result[:, :, i] = (source[:, :, i] - source_mean) * (target_std / source_std) + target_mean
        
        return np.clip(result, 0, 255).astype(np.uint8)