#!/usr/bin/env python3 """ FaceSpace Studio - Advanced Face Manipulation Platform Combines face detection, enhancement, swapping, and style transfer Optimized for Hugging Face Spaces deployment """ import gradio as gr import torch import cv2 import numpy as np from PIL import Image import os import tempfile import subprocess from pathlib import Path import logging from functools import lru_cache from typing import Tuple, Optional, List, Dict import warnings import json import time from dataclasses import dataclass from concurrent.futures import ThreadPoolExecutor import threading # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) warnings.filterwarnings("ignore") # Configuration @dataclass class Config: """Configuration for FaceSpace Studio""" device: str = "cuda" if torch.cuda.is_available() else "cpu" max_image_size: int = 1024 face_detection_size: Tuple[int, int] = (640, 640) enhancement_steps: int = 20 video_fps: int = 12 max_video_frames: int = 60 enable_face_swap: bool = True enable_style_transfer: bool = True cache_dir: str = "/tmp/facespace_cache" config = Config() # Global model registry models = { "face_detector": None, "face_enhancer": None, "face_swapper": None, "style_transfer": None, "upscaler": None } # Thread lock for model loading model_lock = threading.Lock() def setup_environment(): """Setup environment and directories""" os.makedirs(config.cache_dir, exist_ok=True) if config.device == "cuda": # GPU optimizations os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512' torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True torch.backends.cudnn.benchmark = True logger.info(f"Device: {config.device}") if config.device == "cuda": logger.info(f"GPU: {torch.cuda.get_device_name(0)}") @lru_cache(maxsize=1) def load_face_detector(): """Load InsightFace with fallback options""" try: # Try importing InsightFace from insightface.app import FaceAnalysis # Try GPU first, fallback to CPU providers = ['CPUExecutionProvider'] if config.device == "cuda": providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] app = FaceAnalysis( name='buffalo_l', providers=providers, allowed_modules=['detection', 'recognition'] ) app.prepare(ctx_id=0 if config.device == "cuda" else -1, det_size=config.face_detection_size) logger.info("InsightFace loaded successfully") return app except Exception as e: logger.warning(f"InsightFace not available: {e}, using OpenCV fallback") # Fallback to OpenCV face detection class OpenCVFaceDetector: def __init__(self): self.cascade = cv2.CascadeClassifier( cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' ) def get(self, img): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) faces = self.cascade.detectMultiScale(gray, 1.1, 4) # Convert to InsightFace-like format results = [] for (x, y, w, h) in faces: face_dict = type('obj', (object,), { 'bbox': np.array([x, y, x+w, y+h]), 'det_score': 0.99, 'landmark': None })() results.append(face_dict) return results return OpenCVFaceDetector() @lru_cache(maxsize=1) def load_enhancement_pipeline(): """Load Stable Diffusion with optimizations""" try: from diffusers import StableDiffusionImg2ImgPipeline, DPMSolverMultistepScheduler pipe = StableDiffusionImg2ImgPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 if config.device == "cuda" else torch.float32, safety_checker=None, requires_safety_checker=False ) # Optimized scheduler pipe.scheduler = DPMSolverMultistepScheduler.from_config( pipe.scheduler.config, use_karras_sigmas=True ) pipe = pipe.to(config.device) # Memory optimizations if config.device == "cuda": pipe.enable_attention_slicing() pipe.enable_vae_slicing() try: pipe.enable_xformers_memory_efficient_attention() except: pass logger.info("Enhancement pipeline loaded") return pipe except Exception as e: logger.error(f"Failed to load enhancement pipeline: {e}") return None def extract_faces(image: Image.Image, detector) -> List[Dict]: """Extract all faces from image with metadata""" try: # Convert to CV2 format img_cv2 = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) # Detect faces faces = detector.get(img_cv2) if not faces: return [] # Process each face face_data = [] for idx, face in enumerate(faces): bbox = face.bbox.astype(int) x1, y1, x2, y2 = bbox # Add padding height, width = img_cv2.shape[:2] pad = int(max(x2-x1, y2-y1) * 0.3) x1 = max(0, x1 - pad) y1 = max(0, y1 - pad) x2 = min(width, x2 + pad) y2 = min(height, y2 + pad) # Extract face face_img = img_cv2[y1:y2, x1:x2] face_pil = Image.fromarray(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)) face_data.append({ 'id': idx, 'image': face_pil, 'bbox': (x1, y1, x2, y2), 'confidence': getattr(face, 'det_score', 0.99), 'landmarks': getattr(face, 'landmark', None) }) return face_data except Exception as e: logger.error(f"Face extraction error: {e}") return [] def enhance_face(face_img: Image.Image, pipe, prompt: str = "a beautiful person, detailed face, high quality", strength: float = 0.6) -> Image.Image: """Enhance a single face using SD""" try: # Resize to optimal size face_img = face_img.resize((512, 512), Image.LANCZOS) # Generate with torch.inference_mode(): result = pipe( prompt=prompt, image=face_img, strength=strength, guidance_scale=7.5, num_inference_steps=config.enhancement_steps ).images[0] return result except Exception as e: logger.error(f"Enhancement error: {e}") return face_img def blend_face(original: Image.Image, face: Image.Image, bbox: Tuple[int, int, int, int], method: str = "poisson") -> Image.Image: """Blend enhanced face back into original image""" try: x1, y1, x2, y2 = bbox face_width = x2 - x1 face_height = y2 - y1 # Resize face to match bbox face = face.resize((face_width, face_height), Image.LANCZOS) # Convert to arrays orig_array = np.array(original) face_array = np.array(face) if method == "poisson" and face_array.shape[0] > 5 and face_array.shape[1] > 5: try: # Create mask mask = np.ones(face_array.shape[:2], dtype=np.uint8) * 255 # Calculate center center = (x1 + face_width // 2, y1 + face_height // 2) # Apply Poisson blending orig_cv2 = cv2.cvtColor(orig_array, cv2.COLOR_RGB2BGR) face_cv2 = cv2.cvtColor(face_array, cv2.COLOR_RGB2BGR) result = cv2.seamlessClone( face_cv2, orig_cv2, mask, center, cv2.NORMAL_CLONE ) result = cv2.cvtColor(result, cv2.COLOR_BGR2RGB) return Image.fromarray(result) except Exception as e: logger.warning(f"Poisson blend failed: {e}, using alpha blend") method = "alpha" if method == "alpha": # Simple alpha blending with feathering result = orig_array.copy() # Create feathered mask mask = np.ones((face_height, face_width)) y_indices, x_indices = np.ogrid[:face_height, :face_width] # Distance from edges dist_from_edge = np.minimum.reduce([ x_indices, face_width - 1 - x_indices, y_indices, face_height - 1 - y_indices ]) # Feather edges feather_width = min(face_width, face_height) // 8 mask = np.clip(dist_from_edge / feather_width, 0, 1) mask = mask[:, :, np.newaxis] # Blend alpha = 0.8 result[y1:y2, x1:x2] = ( face_array * mask * alpha + orig_array[y1:y2, x1:x2] * (1 - mask * alpha) ).astype(np.uint8) return Image.fromarray(result) except Exception as e: logger.error(f"Blending error: {e}") return original def process_image(image: Image.Image, prompt: str = "beautiful person, detailed face", strength: float = 0.6, enhance_all: bool = True, selected_faces: List[int] = None) -> Tuple[Image.Image, str, List[Dict]]: """Main processing function for images""" if not image: return None, "Please upload an image", [] try: # Load models with model_lock: if not models["face_detector"]: models["face_detector"] = load_face_detector() if not models["face_enhancer"]: models["face_enhancer"] = load_enhancement_pipeline() detector = models["face_detector"] enhancer = models["face_enhancer"] if not detector or not enhancer: return None, "Models not loaded properly", [] # Extract faces faces = extract_faces(image, detector) if not faces: return image, "No faces detected", [] # Determine which faces to process if enhance_all: faces_to_process = faces elif selected_faces: faces_to_process = [f for f in faces if f['id'] in selected_faces] else: faces_to_process = [faces[0]] # Process largest face # Process each face result = image.copy() processed_count = 0 for face_data in faces_to_process: try: # Enhance face enhanced = enhance_face( face_data['image'], enhancer, prompt, strength ) # Blend back result = blend_face( result, enhanced, face_data['bbox'] ) processed_count += 1 except Exception as e: logger.error(f"Error processing face {face_data['id']}: {e}") # Clear GPU memory if torch.cuda.is_available(): torch.cuda.empty_cache() status = f"✅ Enhanced {processed_count}/{len(faces)} faces" return result, status, faces except Exception as e: logger.error(f"Processing error: {e}") return None, f"Error: {str(e)}", [] def swap_faces(source_image: Image.Image, target_image: Image.Image, mode: str = "Single Face", preserve_expression: bool = True) -> Tuple[Image.Image, str]: """Swap faces between images using enhanced source face""" if not source_image or not target_image: return None, "Please provide both source and target images" try: # Load models with model_lock: if not models["face_detector"]: models["face_detector"] = load_face_detector() if not models["face_enhancer"]: models["face_enhancer"] = load_enhancement_pipeline() detector = models["face_detector"] enhancer = models["face_enhancer"] if not detector: return None, "Face detector not loaded" # Extract faces source_faces = extract_faces(source_image, detector) target_faces = extract_faces(target_image, detector) if not source_faces: return None, "No face detected in source image" if not target_faces: return None, "No face detected in target image" # Get source face (use the first/largest) source_face = source_faces[0]['image'] # Determine which target faces to swap if mode == "Single Face": faces_to_swap = [target_faces[0]] # Just the first face elif mode == "All Faces": faces_to_swap = target_faces else: # For selected faces, just use first for now faces_to_swap = [target_faces[0]] # Process swapping result = target_image.copy() swapped_count = 0 for target_face in faces_to_swap: try: # Resize source face to match target target_size = target_face['image'].size source_resized = source_face.resize(target_size, Image.LANCZOS) if enhancer and preserve_expression: # Use SD to blend features while preserving expression prompt = "person, natural expression, photorealistic face" # Blend source and target for expression preservation blended = Image.blend(source_resized, target_face['image'], 0.3) # Enhance the blended face swapped_face = enhance_face( blended, enhancer, prompt, strength=0.7 ) else: # Direct swap without enhancement swapped_face = source_resized # Blend back into target image result = blend_face( result, swapped_face, target_face['bbox'], method="poisson" if preserve_expression else "alpha" ) swapped_count += 1 except Exception as e: logger.error(f"Error swapping face: {e}") # Clear GPU memory if torch.cuda.is_available(): torch.cuda.empty_cache() status = f"✅ Swapped {swapped_count} face(s)" return result, status except Exception as e: logger.error(f"Face swap error: {e}") return None, f"Error: {str(e)}" def create_interface(): """Create Gradio interface with all features""" with gr.Blocks( title="🎭 FaceSpace Studio", theme=gr.themes.Soft( primary_hue="purple", secondary_hue="blue" ), css=""" .gradio-container { max-width: 1200px; margin: auto; } .face-box { border: 2px solid #9333ea; border-radius: 8px; padding: 10px; margin: 5px; } """ ) as demo: gr.Markdown(""" # 🎭 FaceSpace Studio - Advanced Face Manipulation **Features**: Face Detection • Enhancement • Style Transfer • Batch Processing Powered by InsightFace + Stable Diffusion + Advanced Blending """) with gr.Tabs(): # Face Enhancement Tab with gr.TabItem("✨ Face Enhancement"): with gr.Row(): with gr.Column(): input_image = gr.Image( label="Upload Image", type="pil" ) prompt = gr.Textbox( label="Enhancement Prompt", value="beautiful person, detailed face, professional photo", lines=2 ) with gr.Row(): strength = gr.Slider( label="Enhancement Strength", minimum=0.1, maximum=0.9, value=0.6, step=0.1 ) enhance_all = gr.Checkbox( label="Enhance All Faces", value=True ) enhance_btn = gr.Button( "✨ Enhance Faces", variant="primary", size="lg" ) with gr.Column(): output_image = gr.Image( label="Enhanced Result" ) status_text = gr.Textbox( label="Status", interactive=False ) face_info = gr.JSON( label="Detected Faces", visible=False ) # Face Swap Tab with gr.TabItem("🔄 Face Swap"): with gr.Row(): with gr.Column(): source_img = gr.Image( label="Source Face (to copy)", type="pil" ) target_img = gr.Image( label="Target Image (to paste into)", type="pil" ) swap_mode = gr.Radio( choices=["Single Face", "All Faces", "Selected Faces"], value="Single Face", label="Swap Mode" ) preserve_expression = gr.Checkbox( label="Preserve Target Expression", value=True ) swap_btn = gr.Button( "🔄 Swap Faces", variant="primary", size="lg" ) with gr.Column(): swap_result = gr.Image( label="Swapped Result" ) swap_status = gr.Textbox( label="Status", interactive=False ) gr.Markdown(""" ### Tips: - Source image should have a clear face - Works best with similar face angles - Enable expression preservation for natural results """) # Face swap handler swap_btn.click( fn=lambda s, t, m, e: swap_faces(s, t, m, e), inputs=[source_img, target_img, swap_mode, preserve_expression], outputs=[swap_result, swap_status] ) # Style Transfer Tab (Placeholder) with gr.TabItem("🎨 Style Transfer"): gr.Markdown(""" ### Style Transfer - Coming Soon! Features in development: - Artistic styles (oil painting, sketch, anime) - Age progression/regression - Gender transformation - Celebrity style transfer """) # Batch Processing Tab (Placeholder) with gr.TabItem("📦 Batch Processing"): gr.Markdown(""" ### Batch Processing - Coming Soon! Features in development: - Process multiple images - Video frame extraction - Folder upload/download - Progress tracking """) # Event handlers enhance_btn.click( fn=process_image, inputs=[input_image, prompt, strength, enhance_all], outputs=[output_image, status_text, face_info] ) gr.Markdown(""" --- ### 🔧 Technical Details - **Face Detection**: InsightFace buffalo_l / OpenCV fallback - **Enhancement**: Stable Diffusion v1.5 with DPM++ scheduler - **Blending**: Poisson seamless cloning + Alpha feathering - **Optimization**: GPU acceleration, XFormers, VAE slicing Made with ❤️ using advanced AI models """) return demo # Initialize environment setup_environment() # Create interface demo = create_interface() if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, show_error=True )