ShinyySwapper / SinglePhoto.py
ShinyyMineyyON's picture
Upload 112 files
2b9f9c9 verified
import cv2
import os
import numpy as np
import threading
import concurrent.futures
from typing import List, Tuple, Optional
import time
# Try to import insightface, but provide fallback if not available
try:
import insightface
from insightface.app import FaceAnalysis
INSIGHTFACE_AVAILABLE = True
except ImportError as e:
print(f"Warning: insightface not available: {e}")
print("Using OpenCV fallback for face detection")
INSIGHTFACE_AVAILABLE = False
insightface = None
FaceAnalysis = None
class FaceSwapper:
def __init__(self, gpu_enabled=True, gpu_id=0):
"""
Initialize FaceSwapper with GPU acceleration support
Args:
gpu_enabled: Whether to use GPU acceleration
gpu_id: GPU device ID (default 0 for RX 5500 XT)
"""
self.gpu_enabled = gpu_enabled
self.gpu_id = gpu_id
self.ctx_id = gpu_id if gpu_enabled else -1
print(f"Initializing FaceSwapper with GPU {'enabled' if gpu_enabled else 'disabled'} (ctx_id={self.ctx_id})")
if INSIGHTFACE_AVAILABLE:
# Initialize FaceAnalysis with detection and landmark models
# Use optimized settings for RX 5500 XT 8GB VRAM
self.app = FaceAnalysis(name='buffalo_l')
# Optimize detection size for GPU (larger = more accurate but more VRAM usage)
det_size = (1024, 1024) if gpu_enabled else (640, 640)
self.app.prepare(ctx_id=self.ctx_id, det_size=det_size)
# Initialize the swapper model with GPU optimization
self.swapper = insightface.model_zoo.get_model(
'inswapper_128.onnx', download=True, download_zip=True
)
else:
print("Using OpenCV fallback mode - limited functionality")
self.app = None
self.swapper = None
# Configure model for GPU if available
self.gpu_error = None
if INSIGHTFACE_AVAILABLE and gpu_enabled and hasattr(self.swapper, 'session'):
try:
import onnxruntime as ort
# Use DirectML for AMD GPUs, fallback to CPU
providers = ['DmlExecutionProvider', 'CPUExecutionProvider']
self.swapper.session.set_providers(providers)
actual_providers = self.swapper.session.get_providers()
print(f"GPU providers configured: {actual_providers}")
# Check if DirectML is actually being used
if 'DmlExecutionProvider' in actual_providers:
print("✅ GPU acceleration successfully enabled with DirectML (AMD RX 5500 XT)")
else:
print("⚠️ DirectML provider not available, falling back to CPU")
self.gpu_enabled = False
self.ctx_id = -1
self.gpu_error = "DirectML provider not available"
except Exception as e:
print(f"❌ GPU configuration failed, falling back to CPU: {e}")
self.gpu_enabled = False
self.ctx_id = -1
self.gpu_error = str(e)
else:
if not INSIGHTFACE_AVAILABLE:
self.gpu_error = "insightface not available - using OpenCV fallback"
elif not gpu_enabled:
self.gpu_error = "GPU acceleration disabled by user"
else:
self.gpu_error = "Swapper session not available for GPU configuration"
# Performance tracking
self.last_processing_time = 0
self.gpu_memory_usage = 0
def get_gpu_info(self):
"""Get GPU information for RX 5500 XT"""
if not self.gpu_enabled or not INSIGHTFACE_AVAILABLE:
return {
"gpu_enabled": False,
"message": "GPU acceleration disabled or not available",
"error": getattr(self, 'gpu_error', 'Unknown error'),
"ctx_id": self.ctx_id,
"insightface_available": INSIGHTFACE_AVAILABLE
}
try:
import onnxruntime as ort
providers = ort.get_available_providers()
current_providers = getattr(self.swapper.session, 'get_providers', lambda: ['Unknown'])()
return {
"gpu_enabled": True,
"gpu_id": self.gpu_id,
"available_providers": providers,
"current_providers": current_providers,
"ctx_id": self.ctx_id,
"directml_available": 'DmlExecutionProvider' in current_providers,
"detection_size": (1024, 1024) if self.gpu_enabled else (640, 640),
"insightface_available": INSIGHTFACE_AVAILABLE
}
except Exception as e:
return {
"gpu_enabled": False,
"error": str(e),
"fallback_reason": "GPU info retrieval failed",
"insightface_available": INSIGHTFACE_AVAILABLE
}
def transplant_hair(self, src_img, dst_img, src_face, dst_face):
"""
Warps the source hair onto the destination face using Affine Transformation.
"""
# 1. Get Landmarks (keypoints)
src_lm = src_face.kps
dst_lm = dst_face.kps
# 2. Calculate Affine Transform Matrix to align Source face to Target face
# We use the eyes (points 0, 1) and nose (point 2) for alignment
src_pts = src_lm[:3]
dst_pts = dst_lm[:3]
M = cv2.getAffineTransform(src_pts.astype(np.float32), dst_pts.astype(np.float32))
# 3. Warp the entire Source Image to match Target Geometry
h, w = dst_img.shape[:2]
warped_src = cv2.warpAffine(src_img, M, (w, h), borderMode=cv2.BORDER_REFLECT)
# 4. Create a Mask for the Hair (Estimation based on Landmarks)
# We assume hair is generally above the eyebrows .
# Eyebrow points are indices 17-26 in 68-point models, but insightface buffalo_l uses 5 points usually.
# If 5 points: 0,1=eyes, 2=nose, 3,4=mouth corners.
# We estimate the forehead/hairline is above the eyes.
eye_y = int((dst_lm[0][1] + dst_lm[1][1]) / 2) # Average eye height
nose_y = int(dst_lm[2][1])
face_height = nose_y - eye_y
# Define the hair region (Everything significantly above the eyes)
hair_mask = np.zeros((h, w, 3), dtype=np.float32)
# Start the mask slightly above the eyes
forehead_line = int(eye_y - (face_height * 0.8))
# Create a soft gradient mask from the forehead up
if forehead_line > 0:
cv2.rectangle(hair_mask, (0, 0), (w, forehead_line), (1, 1, 1), -1)
# Blur the mask heavily to blend the hairline
hair_mask = cv2.GaussianBlur(hair_mask, (51, 51), 0)
# 5. Blend: (WarpedSource * Mask) + (Target * (1-Mask))
dst_float = dst_img.astype(np.float32) / 255.0
src_float = warped_src.astype(np.float32) / 255.0
final = (src_float * hair_mask) + (dst_float * (1.0 - hair_mask))
final = np.clip(final * 255.0, 0, 255).astype(np.uint8)
return final
def enhance_face_alignment(self, source_img, target_img, source_face, target_face):
"""
Enhanced face alignment using facial landmarks for better positioning
"""
try:
# Get facial landmarks
src_kps = source_face.kps
dst_kps = target_face.kps
# Use 5-point facial landmarks for better alignment
# Points: 0=left eye, 1=right eye, 2=nose tip, 3=left mouth, 4=right mouth
src_pts = np.array(src_kps, dtype=np.float32)
dst_pts = np.array(dst_kps, dtype=np.float32)
# Calculate similarity transform for better alignment than affine
h, w = target_img.shape[:2]
M = cv2.estimateAffinePartial2D(src_pts[:3], dst_pts[:3])[0]
if M is not None:
# Apply transform to source image for better alignment
aligned_source = cv2.warpAffine(source_img, M, (w, h),
borderMode=cv2.BORDER_REFLECT_101)
return aligned_source
else:
return source_img
except Exception as e:
print(f"Face alignment enhancement failed: {e}")
return source_img
def improve_color_matching(self, swapped_face, target_region, target_face_bbox):
"""
Advanced color matching using LAB color space and histogram matching
"""
try:
# Convert to LAB color space for better color separation
swapped_lab = cv2.cvtColor(swapped_face, cv2.COLOR_BGR2LAB)
target_lab = cv2.cvtColor(target_region, cv2.COLOR_BGR2LAB)
# Apply histogram matching for each channel
for i in range(3): # L, A, B channels
swapped_hist = cv2.calcHist([swapped_lab], [i], None, [256], [0, 256])
target_hist = cv2.calcHist([target_lab], [i], None, [256], [0, 256])
# Normalize histograms
swapped_hist = swapped_hist / swapped_hist.sum()
target_hist = target_hist / target_hist.sum()
# Create lookup table for histogram matching
lut = self._create_histogram_lut(swapped_hist, target_hist)
swapped_lab[:,:,i] = cv2.LUT(swapped_lab[:,:,i], lut)
# Convert back to BGR
enhanced_face = cv2.cvtColor(swapped_lab, cv2.COLOR_LAB2BGR)
# Blend with original to maintain natural look
alpha = 0.7 # 70% enhanced, 30% original
final_face = cv2.addWeighted(enhanced_face, alpha, swapped_face, 1-alpha, 0)
return final_face
except Exception as e:
print(f"Color matching enhancement failed: {e}")
return swapped_face
def _create_histogram_lut(self, source_hist, target_hist):
"""
Create lookup table for histogram matching
"""
lut = np.zeros(256, dtype=np.uint8)
source_cdf = source_hist.cumsum()
target_cdf = target_hist.cumsum()
for i in range(256):
source_val = source_cdf[i]
target_idx = np.argmin(np.abs(target_cdf - source_val))
lut[i] = target_idx
return lut
def seamless_blending(self, swapped_face, target_img, target_face_bbox):
"""
Seamless blending using multi-band blending for natural integration
"""
try:
x1, y1, x2, y2 = map(int, target_face_bbox)
# Create mask for face region
mask = np.zeros(target_img.shape[:2], dtype=np.uint8)
center = (int((x1 + x2) / 2), int((y1 + y2) / 2))
size = (int((x2 - x1) / 2), int((y2 - y1) / 2))
cv2.ellipse(mask, center, size, 0, 0, 360, (255, 255, 255), -1)
# Apply Gaussian blur to mask for smooth edges
mask_blurred = cv2.GaussianBlur(mask, (101, 101), 0)
mask_blurred = mask_blurred.astype(np.float32) / 255.0
# Multi-band blending
result = target_img.copy().astype(np.float32)
# Create pyramid for seamless blending
levels = 5
pyramid_swapped = self._create_gaussian_pyramid(swapped_face.astype(np.float32), levels)
pyramid_target = self._create_gaussian_pyramid(target_img[y1:y2, x1:x2].astype(np.float32), levels)
pyramid_mask = self._create_gaussian_pyramid(mask_blurred[y1:y2, x1:x2], levels)
# Blend pyramids
blended_pyramid = []
for i in range(levels):
if i < len(pyramid_swapped) and i < len(pyramid_target) and i < len(pyramid_mask):
blended = (pyramid_swapped[i] * pyramid_mask[i] +
pyramid_target[i] * (1 - pyramid_mask[i]))
blended_pyramid.append(blended)
# Reconstruct from pyramid
if blended_pyramid:
blended_face = self._reconstruct_from_pyramid(blended_pyramid)
result[y1:y2, x1:x2] = blended_face
else:
# Fallback to simple blending
mask_3d = np.stack([mask_blurred[y1:y2, x1:x2]] * 3, axis=-1)
result[y1:y2, x1:x2] = (swapped_face.astype(np.float32) * mask_3d +
target_img[y1:y2, x1:x2].astype(np.float32) * (1 - mask_3d))
return result.astype(np.uint8)
except Exception as e:
print(f"Seamless blending failed: {e}")
# Fallback to simple paste
result = target_img.copy()
x1, y1, x2, y2 = map(int, target_face_bbox)
result[y1:y2, x1:x2] = swapped_face
return result
def _create_gaussian_pyramid(self, img, levels):
"""
Create Gaussian pyramid for multi-band blending
"""
pyramid = [img]
current = img
for i in range(levels - 1):
current = cv2.pyrDown(current)
pyramid.append(current)
return pyramid
def _reconstruct_from_pyramid(self, pyramid):
"""
Reconstruct image from Gaussian pyramid
"""
result = pyramid[-1]
for i in range(len(pyramid) - 2, -1, -1):
result = cv2.pyrUp(result)
if result.shape[:2] != pyramid[i].shape[:2]:
result = cv2.resize(result, (pyramid[i].shape[1], pyramid[i].shape[0]))
result = result + pyramid[i]
return result
def swap_faces(self, source_path, source_face_idx, target_path, target_face_idx, swap_hair=False):
"""Optimized face swap with GPU acceleration"""
start_time = time.time()
source_img = cv2.imread(source_path)
target_img = cv2.imread(target_path)
if source_img is None or target_img is None:
raise ValueError("Could not read one or both images")
# Detect faces with GPU acceleration
source_faces = self.app.get(source_img)
target_faces = self.app.get(target_img)
# Sort faces from left to right
source_faces = sorted(source_faces, key=lambda x: x.bbox[0])
target_faces = sorted(target_faces, key=lambda x: x.bbox[0])
if len(source_faces) < source_face_idx or source_face_idx < 1:
raise ValueError(f"Source image contains {len(source_faces)} faces, but requested face {source_face_idx}")
if len(target_faces) < target_face_idx or target_face_idx < 1:
raise ValueError(f"Target image contains {len(target_faces)} faces, but requested face {target_face_idx}")
source_face = source_faces[source_face_idx - 1]
target_face = target_faces[target_face_idx - 1]
# Enhanced preprocessing for better accuracy
# Step 1: Align source face to target face geometry
aligned_source = self.enhance_face_alignment(source_img, target_img, source_face, target_face)
# Step 2: Perform standard face swap with aligned source
result = self.swapper.get(target_img, target_face, source_face, paste_back=True)
# Step 3: Extract swapped face region for enhancement
x1, y1, x2, y2 = [int(v) for v in target_face.bbox]
swapped_face_region = result[y1:y2, x1:x2]
target_face_region = target_img[y1:y2, x1:x2]
# Step 4: Enhanced color matching
enhanced_face = self.improve_color_matching(swapped_face_region, target_face_region, target_face.bbox)
# Step 5: Seamless blending back into target image
result = self.seamless_blending(enhanced_face, target_img, target_face.bbox)
# Step 6: Optional Hair Transplant (enhanced)
if swap_hair:
try:
result = self.transplant_hair(aligned_source, result, source_face, target_face)
except Exception as e:
print(f"Hair swap failed (fallback to enhanced swap): {e}")
pass
self.last_processing_time = time.time() - start_time
print(f"Face swap completed in {self.last_processing_time:.2f}s (GPU: {'Yes' if self.gpu_enabled else 'No'})")
return result
def swap_faces_batch(self, source_path: str, target_path: str,
source_face_indices: List[int] = None,
target_face_indices: List[int] = None,
swap_hair: bool = False) -> List[np.ndarray]:
"""
Batch face swapping for multiple faces with parallel processing
Optimized for RX 5500 XT 8GB VRAM
"""
if source_face_indices is None:
source_face_indices = [1]
if target_face_indices is None:
target_face_indices = [1]
source_img = cv2.imread(source_path)
target_img = cv2.imread(target_path)
if source_img is None or target_img is None:
raise ValueError("Could not read one or both images")
# Detect all faces once
print("Detecting faces in source and target images...")
source_faces = self.app.get(source_img)
target_faces = self.app.get(target_img)
source_faces = sorted(source_faces, key=lambda x: x.bbox[0])
target_faces = sorted(target_faces, key=lambda x: x.bbox[0])
results = []
# Process combinations in parallel if GPU is available
if self.gpu_enabled and len(source_face_indices) * len(target_face_indices) > 1:
print(f"Processing {len(source_face_indices)}x{len(target_face_indices)} combinations in parallel on GPU...")
results = self._process_parallel_swaps(
source_img, target_img, source_faces, target_faces,
source_face_indices, target_face_indices, swap_hair
)
else:
# Sequential processing for single combinations or CPU fallback
print(f"Processing {len(source_face_indices)}x{len(target_face_indices)} combinations sequentially...")
for s_idx in source_face_indices:
for t_idx in target_face_indices:
try:
result = self._swap_single_face(
source_img, target_img, source_faces, target_faces,
s_idx, t_idx, swap_hair
)
results.append(result)
except Exception as e:
print(f"Failed to swap source face {s_idx} with target face {t_idx}: {e}")
continue
return results
def _process_parallel_swaps(self, source_img, target_img, source_faces, target_faces,
source_indices, target_indices, swap_hair):
"""Parallel processing for multiple face swaps using GPU"""
results = []
def process_combination(s_idx, t_idx):
try:
return self._swap_single_face(
source_img.copy(), target_img.copy(),
source_faces, target_faces, s_idx, t_idx, swap_hair
)
except Exception as e:
print(f"Parallel swap failed for {s_idx}x{t_idx}: {e}")
return None
# Use ThreadPoolExecutor for I/O bound operations and GPU utilization
max_workers = min(4, len(source_indices) * len(target_indices)) # Limit for RX 5500 XT
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# Submit all tasks
futures = []
for s_idx in source_indices:
for t_idx in target_indices:
future = executor.submit(process_combination, s_idx, t_idx)
futures.append((future, s_idx, t_idx))
# Collect results as they complete
for future, s_idx, t_idx in futures:
try:
result = future.result(timeout=30) # 30 second timeout per swap
if result is not None:
results.append(result)
print(f"Completed swap: Source {s_idx} -> Target {t_idx}")
except concurrent.futures.TimeoutError:
print(f"Timeout swapping source face {s_idx} with target face {t_idx}")
except Exception as e:
print(f"Error in parallel processing {s_idx}x{t_idx}: {e}")
return results
def _swap_single_face(self, source_img, target_img, source_faces, target_faces,
source_idx, target_idx, swap_hair):
"""Single face swap with all enhancements"""
if len(source_faces) < source_idx or source_idx < 1:
raise ValueError(f"Source image contains {len(source_faces)} faces, but requested face {source_idx}")
if len(target_faces) < target_idx or target_idx < 1:
raise ValueError(f"Target image contains {len(target_faces)} faces, but requested face {target_idx}")
source_face = source_faces[source_idx - 1]
target_face = target_faces[target_idx - 1]
# Enhanced preprocessing
aligned_source = self.enhance_face_alignment(source_img, target_img, source_face, target_face)
# Face swap
result = self.swapper.get(target_img, target_face, source_face, paste_back=True)
# Extract and enhance face region
x1, y1, x2, y2 = [int(v) for v in target_face.bbox]
swapped_face_region = result[y1:y2, x1:x2]
target_face_region = target_img[y1:y2, x1:x2]
enhanced_face = self.improve_color_matching(swapped_face_region, target_face_region, target_face.bbox)
result = self.seamless_blending(enhanced_face, target_img, target_face.bbox)
# Optional hair transplant
if swap_hair:
try:
result = self.transplant_hair(aligned_source, result, source_face, target_face)
except Exception as e:
print(f"Hair swap failed: {e}")
return result
def optimize_for_gpu_memory(self, max_faces_per_batch=4):
"""
Optimize settings for RX 5500 XT 8GB VRAM
Adjust batch sizes and image resolutions based on available VRAM
"""
if not self.gpu_enabled:
return max_faces_per_batch
# Conservative settings for 8GB VRAM to avoid OOM
vram_safety_margin = 2 # GB reserved for system
estimated_vram_per_face = 0.5 # GB per high-res face processing
available_vram = 8 - vram_safety_margin
optimal_batch_size = min(max_faces_per_batch, int(available_vram / estimated_vram_per_face))
print(f"GPU VRAM optimization: {available_vram}GB available, batch size: {optimal_batch_size}")
return optimal_batch_size
def count_faces(self, img_path):
"""
Counts the number of faces in the given image file.
"""
img = cv2.imread(img_path)
# Use your face detector here. For example, with OpenCV's Haar cascade:
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.1, 4)
return len(faces)
def main():
# Paths relative to root
source_path = os.path.join("SinglePhoto", "data_src.jpg")
target_path = os.path.join("SinglePhoto", "data_dst.jpg")
output_dir = os.path.join("SinglePhoto", "output")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
swapper = FaceSwapper()
try:
# Ask user for target_face_idx, default to 1 if no input or invalid input
try:
user_input = input("Enter the target face index (starting from 1, default is 1): ")
target_face_idx = int(user_input) if user_input.strip() else 1
if target_face_idx < 1:
print("Invalid index. Using default value 1.")
target_face_idx = 1
except ValueError:
print("Invalid input. Using default value 1.")
target_face_idx = 1
try:
# Default swap_hair to False in CLI mode, or True if you want to test it
result = swapper.swap_faces(
source_path=source_path,
source_face_idx=1,
target_path=target_path,
target_face_idx=target_face_idx,
swap_hair=True # Enabled for testing
)
except ValueError as ve:
if "Target image contains" in str(ve):
print(f"Target face idx {target_face_idx} not found, trying with idx 1.")
result = swapper.swap_faces(
source_path=source_path,
source_face_idx=1,
target_path=target_path,
target_face_idx=1,
swap_hair=True
)
else:
raise ve
output_path = os.path.join(output_dir, "swapped_face.jpg")
cv2.imwrite(output_path, result)
print(f"Face swap completed successfully. Result saved to: {output_path}")
except Exception as e:
print(f"Error occurred: {str(e)}")
if __name__ == "__main__":
main()