import cv2 import os import numpy as np import threading import concurrent.futures from typing import List, Tuple, Optional import time # Try to import insightface, but provide fallback if not available try: import insightface from insightface.app import FaceAnalysis INSIGHTFACE_AVAILABLE = True except ImportError as e: print(f"Warning: insightface not available: {e}") print("Using OpenCV fallback for face detection") INSIGHTFACE_AVAILABLE = False insightface = None FaceAnalysis = None class FaceSwapper: def __init__(self, gpu_enabled=True, gpu_id=0): """ Initialize FaceSwapper with GPU acceleration support Args: gpu_enabled: Whether to use GPU acceleration gpu_id: GPU device ID (default 0 for RX 5500 XT) """ self.gpu_enabled = gpu_enabled self.gpu_id = gpu_id self.ctx_id = gpu_id if gpu_enabled else -1 print(f"Initializing FaceSwapper with GPU {'enabled' if gpu_enabled else 'disabled'} (ctx_id={self.ctx_id})") if INSIGHTFACE_AVAILABLE: # Initialize FaceAnalysis with detection and landmark models # Use optimized settings for RX 5500 XT 8GB VRAM self.app = FaceAnalysis(name='buffalo_l') # Optimize detection size for GPU (larger = more accurate but more VRAM usage) det_size = (1024, 1024) if gpu_enabled else (640, 640) self.app.prepare(ctx_id=self.ctx_id, det_size=det_size) # Initialize the swapper model with GPU optimization self.swapper = insightface.model_zoo.get_model( 'inswapper_128.onnx', download=True, download_zip=True ) else: print("Using OpenCV fallback mode - limited functionality") self.app = None self.swapper = None # Configure model for GPU if available self.gpu_error = None if INSIGHTFACE_AVAILABLE and gpu_enabled and hasattr(self.swapper, 'session'): try: import onnxruntime as ort # Use DirectML for AMD GPUs, fallback to CPU providers = ['DmlExecutionProvider', 'CPUExecutionProvider'] self.swapper.session.set_providers(providers) actual_providers = self.swapper.session.get_providers() print(f"GPU providers configured: {actual_providers}") # Check if DirectML is actually being used if 'DmlExecutionProvider' in actual_providers: print("✅ GPU acceleration successfully enabled with DirectML (AMD RX 5500 XT)") else: print("⚠️ DirectML provider not available, falling back to CPU") self.gpu_enabled = False self.ctx_id = -1 self.gpu_error = "DirectML provider not available" except Exception as e: print(f"❌ GPU configuration failed, falling back to CPU: {e}") self.gpu_enabled = False self.ctx_id = -1 self.gpu_error = str(e) else: if not INSIGHTFACE_AVAILABLE: self.gpu_error = "insightface not available - using OpenCV fallback" elif not gpu_enabled: self.gpu_error = "GPU acceleration disabled by user" else: self.gpu_error = "Swapper session not available for GPU configuration" # Performance tracking self.last_processing_time = 0 self.gpu_memory_usage = 0 def get_gpu_info(self): """Get GPU information for RX 5500 XT""" if not self.gpu_enabled or not INSIGHTFACE_AVAILABLE: return { "gpu_enabled": False, "message": "GPU acceleration disabled or not available", "error": getattr(self, 'gpu_error', 'Unknown error'), "ctx_id": self.ctx_id, "insightface_available": INSIGHTFACE_AVAILABLE } try: import onnxruntime as ort providers = ort.get_available_providers() current_providers = getattr(self.swapper.session, 'get_providers', lambda: ['Unknown'])() return { "gpu_enabled": True, "gpu_id": self.gpu_id, "available_providers": providers, "current_providers": current_providers, "ctx_id": self.ctx_id, "directml_available": 'DmlExecutionProvider' in current_providers, "detection_size": (1024, 1024) if self.gpu_enabled else (640, 640), "insightface_available": INSIGHTFACE_AVAILABLE } except Exception as e: return { "gpu_enabled": False, "error": str(e), "fallback_reason": "GPU info retrieval failed", "insightface_available": INSIGHTFACE_AVAILABLE } def transplant_hair(self, src_img, dst_img, src_face, dst_face): """ Warps the source hair onto the destination face using Affine Transformation. """ # 1. Get Landmarks (keypoints) src_lm = src_face.kps dst_lm = dst_face.kps # 2. Calculate Affine Transform Matrix to align Source face to Target face # We use the eyes (points 0, 1) and nose (point 2) for alignment src_pts = src_lm[:3] dst_pts = dst_lm[:3] M = cv2.getAffineTransform(src_pts.astype(np.float32), dst_pts.astype(np.float32)) # 3. Warp the entire Source Image to match Target Geometry h, w = dst_img.shape[:2] warped_src = cv2.warpAffine(src_img, M, (w, h), borderMode=cv2.BORDER_REFLECT) # 4. Create a Mask for the Hair (Estimation based on Landmarks) # We assume hair is generally above the eyebrows . # Eyebrow points are indices 17-26 in 68-point models, but insightface buffalo_l uses 5 points usually. # If 5 points: 0,1=eyes, 2=nose, 3,4=mouth corners. # We estimate the forehead/hairline is above the eyes. eye_y = int((dst_lm[0][1] + dst_lm[1][1]) / 2) # Average eye height nose_y = int(dst_lm[2][1]) face_height = nose_y - eye_y # Define the hair region (Everything significantly above the eyes) hair_mask = np.zeros((h, w, 3), dtype=np.float32) # Start the mask slightly above the eyes forehead_line = int(eye_y - (face_height * 0.8)) # Create a soft gradient mask from the forehead up if forehead_line > 0: cv2.rectangle(hair_mask, (0, 0), (w, forehead_line), (1, 1, 1), -1) # Blur the mask heavily to blend the hairline hair_mask = cv2.GaussianBlur(hair_mask, (51, 51), 0) # 5. Blend: (WarpedSource * Mask) + (Target * (1-Mask)) dst_float = dst_img.astype(np.float32) / 255.0 src_float = warped_src.astype(np.float32) / 255.0 final = (src_float * hair_mask) + (dst_float * (1.0 - hair_mask)) final = np.clip(final * 255.0, 0, 255).astype(np.uint8) return final def enhance_face_alignment(self, source_img, target_img, source_face, target_face): """ Enhanced face alignment using facial landmarks for better positioning """ try: # Get facial landmarks src_kps = source_face.kps dst_kps = target_face.kps # Use 5-point facial landmarks for better alignment # Points: 0=left eye, 1=right eye, 2=nose tip, 3=left mouth, 4=right mouth src_pts = np.array(src_kps, dtype=np.float32) dst_pts = np.array(dst_kps, dtype=np.float32) # Calculate similarity transform for better alignment than affine h, w = target_img.shape[:2] M = cv2.estimateAffinePartial2D(src_pts[:3], dst_pts[:3])[0] if M is not None: # Apply transform to source image for better alignment aligned_source = cv2.warpAffine(source_img, M, (w, h), borderMode=cv2.BORDER_REFLECT_101) return aligned_source else: return source_img except Exception as e: print(f"Face alignment enhancement failed: {e}") return source_img def improve_color_matching(self, swapped_face, target_region, target_face_bbox): """ Advanced color matching using LAB color space and histogram matching """ try: # Convert to LAB color space for better color separation swapped_lab = cv2.cvtColor(swapped_face, cv2.COLOR_BGR2LAB) target_lab = cv2.cvtColor(target_region, cv2.COLOR_BGR2LAB) # Apply histogram matching for each channel for i in range(3): # L, A, B channels swapped_hist = cv2.calcHist([swapped_lab], [i], None, [256], [0, 256]) target_hist = cv2.calcHist([target_lab], [i], None, [256], [0, 256]) # Normalize histograms swapped_hist = swapped_hist / swapped_hist.sum() target_hist = target_hist / target_hist.sum() # Create lookup table for histogram matching lut = self._create_histogram_lut(swapped_hist, target_hist) swapped_lab[:,:,i] = cv2.LUT(swapped_lab[:,:,i], lut) # Convert back to BGR enhanced_face = cv2.cvtColor(swapped_lab, cv2.COLOR_LAB2BGR) # Blend with original to maintain natural look alpha = 0.7 # 70% enhanced, 30% original final_face = cv2.addWeighted(enhanced_face, alpha, swapped_face, 1-alpha, 0) return final_face except Exception as e: print(f"Color matching enhancement failed: {e}") return swapped_face def _create_histogram_lut(self, source_hist, target_hist): """ Create lookup table for histogram matching """ lut = np.zeros(256, dtype=np.uint8) source_cdf = source_hist.cumsum() target_cdf = target_hist.cumsum() for i in range(256): source_val = source_cdf[i] target_idx = np.argmin(np.abs(target_cdf - source_val)) lut[i] = target_idx return lut def seamless_blending(self, swapped_face, target_img, target_face_bbox): """ Seamless blending using multi-band blending for natural integration """ try: x1, y1, x2, y2 = map(int, target_face_bbox) # Create mask for face region mask = np.zeros(target_img.shape[:2], dtype=np.uint8) center = (int((x1 + x2) / 2), int((y1 + y2) / 2)) size = (int((x2 - x1) / 2), int((y2 - y1) / 2)) cv2.ellipse(mask, center, size, 0, 0, 360, (255, 255, 255), -1) # Apply Gaussian blur to mask for smooth edges mask_blurred = cv2.GaussianBlur(mask, (101, 101), 0) mask_blurred = mask_blurred.astype(np.float32) / 255.0 # Multi-band blending result = target_img.copy().astype(np.float32) # Create pyramid for seamless blending levels = 5 pyramid_swapped = self._create_gaussian_pyramid(swapped_face.astype(np.float32), levels) pyramid_target = self._create_gaussian_pyramid(target_img[y1:y2, x1:x2].astype(np.float32), levels) pyramid_mask = self._create_gaussian_pyramid(mask_blurred[y1:y2, x1:x2], levels) # Blend pyramids blended_pyramid = [] for i in range(levels): if i < len(pyramid_swapped) and i < len(pyramid_target) and i < len(pyramid_mask): blended = (pyramid_swapped[i] * pyramid_mask[i] + pyramid_target[i] * (1 - pyramid_mask[i])) blended_pyramid.append(blended) # Reconstruct from pyramid if blended_pyramid: blended_face = self._reconstruct_from_pyramid(blended_pyramid) result[y1:y2, x1:x2] = blended_face else: # Fallback to simple blending mask_3d = np.stack([mask_blurred[y1:y2, x1:x2]] * 3, axis=-1) result[y1:y2, x1:x2] = (swapped_face.astype(np.float32) * mask_3d + target_img[y1:y2, x1:x2].astype(np.float32) * (1 - mask_3d)) return result.astype(np.uint8) except Exception as e: print(f"Seamless blending failed: {e}") # Fallback to simple paste result = target_img.copy() x1, y1, x2, y2 = map(int, target_face_bbox) result[y1:y2, x1:x2] = swapped_face return result def _create_gaussian_pyramid(self, img, levels): """ Create Gaussian pyramid for multi-band blending """ pyramid = [img] current = img for i in range(levels - 1): current = cv2.pyrDown(current) pyramid.append(current) return pyramid def _reconstruct_from_pyramid(self, pyramid): """ Reconstruct image from Gaussian pyramid """ result = pyramid[-1] for i in range(len(pyramid) - 2, -1, -1): result = cv2.pyrUp(result) if result.shape[:2] != pyramid[i].shape[:2]: result = cv2.resize(result, (pyramid[i].shape[1], pyramid[i].shape[0])) result = result + pyramid[i] return result def swap_faces(self, source_path, source_face_idx, target_path, target_face_idx, swap_hair=False): """Optimized face swap with GPU acceleration""" start_time = time.time() source_img = cv2.imread(source_path) target_img = cv2.imread(target_path) if source_img is None or target_img is None: raise ValueError("Could not read one or both images") # Detect faces with GPU acceleration source_faces = self.app.get(source_img) target_faces = self.app.get(target_img) # Sort faces from left to right source_faces = sorted(source_faces, key=lambda x: x.bbox[0]) target_faces = sorted(target_faces, key=lambda x: x.bbox[0]) if len(source_faces) < source_face_idx or source_face_idx < 1: raise ValueError(f"Source image contains {len(source_faces)} faces, but requested face {source_face_idx}") if len(target_faces) < target_face_idx or target_face_idx < 1: raise ValueError(f"Target image contains {len(target_faces)} faces, but requested face {target_face_idx}") source_face = source_faces[source_face_idx - 1] target_face = target_faces[target_face_idx - 1] # Enhanced preprocessing for better accuracy # Step 1: Align source face to target face geometry aligned_source = self.enhance_face_alignment(source_img, target_img, source_face, target_face) # Step 2: Perform standard face swap with aligned source result = self.swapper.get(target_img, target_face, source_face, paste_back=True) # Step 3: Extract swapped face region for enhancement x1, y1, x2, y2 = [int(v) for v in target_face.bbox] swapped_face_region = result[y1:y2, x1:x2] target_face_region = target_img[y1:y2, x1:x2] # Step 4: Enhanced color matching enhanced_face = self.improve_color_matching(swapped_face_region, target_face_region, target_face.bbox) # Step 5: Seamless blending back into target image result = self.seamless_blending(enhanced_face, target_img, target_face.bbox) # Step 6: Optional Hair Transplant (enhanced) if swap_hair: try: result = self.transplant_hair(aligned_source, result, source_face, target_face) except Exception as e: print(f"Hair swap failed (fallback to enhanced swap): {e}") pass self.last_processing_time = time.time() - start_time print(f"Face swap completed in {self.last_processing_time:.2f}s (GPU: {'Yes' if self.gpu_enabled else 'No'})") return result def swap_faces_batch(self, source_path: str, target_path: str, source_face_indices: List[int] = None, target_face_indices: List[int] = None, swap_hair: bool = False) -> List[np.ndarray]: """ Batch face swapping for multiple faces with parallel processing Optimized for RX 5500 XT 8GB VRAM """ if source_face_indices is None: source_face_indices = [1] if target_face_indices is None: target_face_indices = [1] source_img = cv2.imread(source_path) target_img = cv2.imread(target_path) if source_img is None or target_img is None: raise ValueError("Could not read one or both images") # Detect all faces once print("Detecting faces in source and target images...") source_faces = self.app.get(source_img) target_faces = self.app.get(target_img) source_faces = sorted(source_faces, key=lambda x: x.bbox[0]) target_faces = sorted(target_faces, key=lambda x: x.bbox[0]) results = [] # Process combinations in parallel if GPU is available if self.gpu_enabled and len(source_face_indices) * len(target_face_indices) > 1: print(f"Processing {len(source_face_indices)}x{len(target_face_indices)} combinations in parallel on GPU...") results = self._process_parallel_swaps( source_img, target_img, source_faces, target_faces, source_face_indices, target_face_indices, swap_hair ) else: # Sequential processing for single combinations or CPU fallback print(f"Processing {len(source_face_indices)}x{len(target_face_indices)} combinations sequentially...") for s_idx in source_face_indices: for t_idx in target_face_indices: try: result = self._swap_single_face( source_img, target_img, source_faces, target_faces, s_idx, t_idx, swap_hair ) results.append(result) except Exception as e: print(f"Failed to swap source face {s_idx} with target face {t_idx}: {e}") continue return results def _process_parallel_swaps(self, source_img, target_img, source_faces, target_faces, source_indices, target_indices, swap_hair): """Parallel processing for multiple face swaps using GPU""" results = [] def process_combination(s_idx, t_idx): try: return self._swap_single_face( source_img.copy(), target_img.copy(), source_faces, target_faces, s_idx, t_idx, swap_hair ) except Exception as e: print(f"Parallel swap failed for {s_idx}x{t_idx}: {e}") return None # Use ThreadPoolExecutor for I/O bound operations and GPU utilization max_workers = min(4, len(source_indices) * len(target_indices)) # Limit for RX 5500 XT with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: # Submit all tasks futures = [] for s_idx in source_indices: for t_idx in target_indices: future = executor.submit(process_combination, s_idx, t_idx) futures.append((future, s_idx, t_idx)) # Collect results as they complete for future, s_idx, t_idx in futures: try: result = future.result(timeout=30) # 30 second timeout per swap if result is not None: results.append(result) print(f"Completed swap: Source {s_idx} -> Target {t_idx}") except concurrent.futures.TimeoutError: print(f"Timeout swapping source face {s_idx} with target face {t_idx}") except Exception as e: print(f"Error in parallel processing {s_idx}x{t_idx}: {e}") return results def _swap_single_face(self, source_img, target_img, source_faces, target_faces, source_idx, target_idx, swap_hair): """Single face swap with all enhancements""" if len(source_faces) < source_idx or source_idx < 1: raise ValueError(f"Source image contains {len(source_faces)} faces, but requested face {source_idx}") if len(target_faces) < target_idx or target_idx < 1: raise ValueError(f"Target image contains {len(target_faces)} faces, but requested face {target_idx}") source_face = source_faces[source_idx - 1] target_face = target_faces[target_idx - 1] # Enhanced preprocessing aligned_source = self.enhance_face_alignment(source_img, target_img, source_face, target_face) # Face swap result = self.swapper.get(target_img, target_face, source_face, paste_back=True) # Extract and enhance face region x1, y1, x2, y2 = [int(v) for v in target_face.bbox] swapped_face_region = result[y1:y2, x1:x2] target_face_region = target_img[y1:y2, x1:x2] enhanced_face = self.improve_color_matching(swapped_face_region, target_face_region, target_face.bbox) result = self.seamless_blending(enhanced_face, target_img, target_face.bbox) # Optional hair transplant if swap_hair: try: result = self.transplant_hair(aligned_source, result, source_face, target_face) except Exception as e: print(f"Hair swap failed: {e}") return result def optimize_for_gpu_memory(self, max_faces_per_batch=4): """ Optimize settings for RX 5500 XT 8GB VRAM Adjust batch sizes and image resolutions based on available VRAM """ if not self.gpu_enabled: return max_faces_per_batch # Conservative settings for 8GB VRAM to avoid OOM vram_safety_margin = 2 # GB reserved for system estimated_vram_per_face = 0.5 # GB per high-res face processing available_vram = 8 - vram_safety_margin optimal_batch_size = min(max_faces_per_batch, int(available_vram / estimated_vram_per_face)) print(f"GPU VRAM optimization: {available_vram}GB available, batch size: {optimal_batch_size}") return optimal_batch_size def count_faces(self, img_path): """ Counts the number of faces in the given image file. """ img = cv2.imread(img_path) # Use your face detector here. For example, with OpenCV's Haar cascade: face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.1, 4) return len(faces) def main(): # Paths relative to root source_path = os.path.join("SinglePhoto", "data_src.jpg") target_path = os.path.join("SinglePhoto", "data_dst.jpg") output_dir = os.path.join("SinglePhoto", "output") if not os.path.exists(output_dir): os.makedirs(output_dir) swapper = FaceSwapper() try: # Ask user for target_face_idx, default to 1 if no input or invalid input try: user_input = input("Enter the target face index (starting from 1, default is 1): ") target_face_idx = int(user_input) if user_input.strip() else 1 if target_face_idx < 1: print("Invalid index. Using default value 1.") target_face_idx = 1 except ValueError: print("Invalid input. Using default value 1.") target_face_idx = 1 try: # Default swap_hair to False in CLI mode, or True if you want to test it result = swapper.swap_faces( source_path=source_path, source_face_idx=1, target_path=target_path, target_face_idx=target_face_idx, swap_hair=True # Enabled for testing ) except ValueError as ve: if "Target image contains" in str(ve): print(f"Target face idx {target_face_idx} not found, trying with idx 1.") result = swapper.swap_faces( source_path=source_path, source_face_idx=1, target_path=target_path, target_face_idx=1, swap_hair=True ) else: raise ve output_path = os.path.join(output_dir, "swapped_face.jpg") cv2.imwrite(output_path, result) print(f"Face swap completed successfully. Result saved to: {output_path}") except Exception as e: print(f"Error occurred: {str(e)}") if __name__ == "__main__": main()