Spaces:

malazjanbeih
/

swapface

Paused

File size: 19,149 Bytes

# ============================================================================
# ULTIMATE FACE SWAP - 100% QUALITY + HEAD SWAP (WITH HAIR!)
# Fixed for Hugging Face Spaces deployment
# ============================================================================

print("="*80)
print("ULTIMATE FACE SWAP - 100% QUALITY + HEAD SWAP MODE!")
print("="*80)

import subprocess, sys

print("\n[1/7] Installing packages...")
subprocess.check_call([
    sys.executable, "-m", "pip", "install", "-q",
    "gradio==3.50.2", "insightface==0.7.3", "onnxruntime", 
    "opencv-python-headless", "moviepy==1.0.3", "numpy", "scipy", "tqdm",
    "gfpgan", "basicsr", "facexlib", "torch", "torchvision"
])
print("✓ Installed")

print("\n[2/7] Importing libraries...")
import gradio as gr
import cv2
import numpy as np
import os
import tempfile
from insightface.app import FaceAnalysis
from insightface.model_zoo import get_model

# Import moviepy with fallback for different versions
try:
    from moviepy.editor import VideoFileClip, ImageSequenceClip
except ImportError:
    from moviepy import VideoFileClip, ImageSequenceClip

from tqdm import tqdm
print("✓ Imported")

# ============================================================================
# SECTION 1: FACE DETECTION (CPU MODE)
# ============================================================================
print("\n[3/7] Loading face detector...")
face_app = FaceAnalysis(name="buffalo_l", providers=['CPUExecutionProvider'])
face_app.prepare(ctx_id=-1, det_size=(640, 640))  # ctx_id=-1 for CPU
print("✓ Face detector loaded (CPU mode)")

# ============================================================================
# SECTION 2: INSWAPPER MODEL (CPU MODE)
# ============================================================================
print("\n[4/7] Loading INSwapper...")

swapper = None
SWAPPER_LOADED = False

try:
    model_path = 'inswapper_128.onnx'
    
    if not os.path.exists(model_path) or os.path.getsize(model_path) < 100_000_000:
        print("  Downloading from HuggingFace...")
        import urllib.request
        url = "https://huggingface.co/CountFloyd/deepfake/resolve/main/inswapper_128.onnx"
        urllib.request.urlretrieve(url, model_path)
        print(f"  ✓ Downloaded ({os.path.getsize(model_path) // 1_000_000}MB)")
    
    swapper = get_model(model_path, download=False, download_zip=False, providers=['CPUExecutionProvider'])
    
    SWAPPER_LOADED = True
    print("✓ INSwapper loaded (CPU mode)")
    
except Exception as e:
    print(f"✗ INSwapper failed: {e}")

# ============================================================================
# SECTION 3: CODEFORMER (SIMPLIFIED FOR CPU)
# ============================================================================
print("\n[5/7] Loading CodeFormer...")

codeformer_net = None
CODEFORMER_LOADED = False

try:
    from basicsr.archs.rrdbnet_arch import RRDBNet
    from basicsr.utils.download_util import load_file_from_url
    from basicsr.utils import imwrite, img2tensor, tensor2img
    from facexlib.utils.face_restoration_helper import FaceRestoreHelper
    import torch
    
    # Download CodeFormer model
    model_url = 'https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth'
    model_path = 'codeformer.pth'
    
    if not os.path.exists(model_path):
        print("  Downloading CodeFormer...")
        import urllib.request
        urllib.request.urlretrieve(model_url, model_path)
        print("  ✓ Downloaded")
    
    # Load CodeFormer network
    from basicsr.archs import build_network
    
    codeformer_net = build_network({
        'type': 'CodeFormer',
        'dim_embd': 512,
        'n_head': 8,
        'n_layers': 9,
        'connect_list': ['32', '64', '128', '256']
    })
    
    checkpoint = torch.load(model_path, map_location='cpu')
    codeformer_net.load_state_dict(checkpoint['params_ema'])
    codeformer_net.eval()
    
    # Always use CPU for Spaces
    device = 'cpu'
    codeformer_net = codeformer_net.to(device)
    
    # Face helper for detection and alignment
    face_helper = FaceRestoreHelper(
        upscale_factor=1,
        face_size=512,
        crop_ratio=(1, 1),
        det_model='retinaface_resnet50',
        save_ext='png',
        use_parse=True,
        device=device
    )
    
    CODEFORMER_LOADED = True
    print("✓ CodeFormer loaded (CPU mode)")
    
except Exception as e:
    print(f"⚠ CodeFormer failed: {e}")
    print("  Will use basic enhancement only")

# ============================================================================
# SECTION 4: GFPGAN (BACKUP/COMPLEMENTARY)
# ============================================================================
print("\n[6/7] Loading GFPGAN...")

gfpgan_restorer = None
GFPGAN_LOADED = False

try:
    from gfpgan import GFPGANer
    
    model_file = 'GFPGANv1.4.pth'
    
    if not os.path.exists(model_file):
        print("  Downloading GFPGAN...")
        import urllib.request
        urllib.request.urlretrieve(
            "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth",
            model_file
        )
    
    gfpgan_restorer = GFPGANer(
        model_path=model_file,
        upscale=2,
        arch='clean',
        channel_multiplier=2,
        bg_upsampler=None,
        device='cpu'  # Force CPU
    )
    
    GFPGAN_LOADED = True
    print("✓ GFPGAN loaded (CPU mode)")
    
except Exception as e:
    print(f"⚠ GFPGAN unavailable: {e}")

# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def resize_preview(image, max_width=400, max_height=300):
    """Resize to 1/4 size"""
    if image is None:
        return None
    
    h, w = image.shape[:2]
    scale = min(max_width / w, max_height / h, 1.0)
    
    if scale < 1.0:
        new_w = int(w * scale)
        new_h = int(h * scale)
        return cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
    
    return image

def detect_faces_with_preview(image):
    """Detect faces with small preview"""
    if image is None:
        return None, []
    
    if len(image.shape) == 2:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    elif image.shape[2] == 4:
        image = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
    
    faces = face_app.get(image)
    
    if not faces:
        preview_small = resize_preview(image)
        return cv2.cvtColor(preview_small, cv2.COLOR_BGR2RGB), []
    
    preview = image.copy()
    
    for i, face in enumerate(faces):
        x1, y1, x2, y2 = face.bbox.astype(int)
        cv2.rectangle(preview, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(
            preview, f"Face {i+1}", (x1, y1 - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2
        )
    
    preview_small = resize_preview(preview)
    return cv2.cvtColor(preview_small, cv2.COLOR_BGR2RGB), faces

# ============================================================================
# CODEFORMER RESTORATION FUNCTION
# ============================================================================

def restore_with_codeformer(face_img, fidelity_weight=0.2):
    """Apply CodeFormer restoration"""
    import torch
    from torchvision.transforms import functional as F
    
    device = 'cpu'
    
    # Prepare image
    face_img = cv2.resize(face_img, (512, 512), interpolation=cv2.INTER_LINEAR)
    face_img = face_img.astype(np.float32) / 255.0
    face_img = torch.from_numpy(face_img).permute(2, 0, 1).unsqueeze(0).to(device)
    
    # Normalize
    face_img = F.normalize(face_img, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    
    # Run CodeFormer
    with torch.no_grad():
        output = codeformer_net(face_img, w=fidelity_weight)[0]
    
    # Convert back
    output = output.squeeze(0).permute(1, 2, 0).cpu().numpy()
    output = np.clip((output + 1) / 2 * 255, 0, 255).astype(np.uint8)
    
    return output

# ============================================================================
# ENHANCED FACE SWAP
# ============================================================================

def swap_face_in_frame(frame, source_face, target_face_idx=None, include_hair=False):
    """Enhanced face swap with restoration"""
    if not SWAPPER_LOADED:
        return frame
    
    target_faces = face_app.get(frame)
    
    if len(target_faces) == 0:
        return frame
    
    # Swap face(s)
    if target_face_idx is not None:
        if target_face_idx >= len(target_faces):
            return frame
        result = swapper.get(frame, target_faces[target_face_idx], source_face, paste_back=True)
    else:
        result = frame.copy()
        for target_face in target_faces:
            result = swapper.get(result, target_face, source_face, paste_back=True)
    
    # Apply restoration
    if CODEFORMER_LOADED and codeformer_net:
        try:
            swapped_faces = face_app.get(result)
            
            for face in swapped_faces:
                x1, y1, x2, y2 = face.bbox.astype(int)
                h, w = result.shape[:2]
                
                if include_hair:
                    pad = int(max(x2-x1, y2-y1) * 0.6)
                else:
                    pad = int(max(x2-x1, y2-y1) * 0.3)
                
                x1 = max(0, x1 - pad)
                y1 = max(0, y1 - pad)
                x2 = min(w, x2 + pad)
                y2 = min(h, y2 + pad)
                
                face_region = result[y1:y2, x1:x2].copy()
                original_size = (x2-x1, y2-y1)
                
                restored_face = restore_with_codeformer(face_region, fidelity_weight=0.2)
                restored_face = cv2.resize(restored_face, original_size, interpolation=cv2.INTER_LANCZOS4)
                
                if GFPGAN_LOADED and gfpgan_restorer:
                    try:
                        _, _, restored_face = gfpgan_restorer.enhance(
                            restored_face,
                            has_aligned=False,
                            paste_back=True,
                            weight=0.5
                        )
                    except:
                        pass
                
                result[y1:y2, x1:x2] = restored_face
                
            print("  ✓ CodeFormer applied")
            
        except Exception as e:
            print(f"  ⚠ CodeFormer error: {e}")
    
    elif GFPGAN_LOADED and gfpgan_restorer:
        try:
            swapped_faces = face_app.get(result)
            
            for face in swapped_faces:
                x1, y1, x2, y2 = face.bbox.astype(int)
                h, w = result.shape[:2]
                
                pad = int(max(x2-x1, y2-y1) * (0.6 if include_hair else 0.3))
                x1 = max(0, x1 - pad)
                y1 = max(0, y1 - pad)
                x2 = min(w, x2 + pad)
                y2 = min(h, y2 + pad)
                
                face_region = result[y1:y2, x1:x2].copy()
                
                _, _, restored_face = gfpgan_restorer.enhance(
                    face_region,
                    has_aligned=False,
                    paste_back=True,
                    weight=0.9
                )
                
                result[y1:y2, x1:x2] = restored_face
                
            print("  ✓ GFPGAN applied")
            
        except Exception as e:
            print(f"  ⚠ GFPGAN error: {e}")
    
    return result

# ============================================================================
# VIDEO PROCESSING
# ============================================================================

def process_video(video_path, source_face, target_face_index, include_hair, progress_fn):
    """Process video with face swap"""
    
    if not SWAPPER_LOADED:
        raise ValueError("INSwapper not loaded!")
    
    clip = VideoFileClip(video_path)
    fps = clip.fps
    total_frames = int(clip.duration * fps)
    
    print(f"\nProcessing: {total_frames} frames @ {fps}fps")
    if include_hair:
        print("HEAD SWAP MODE: Swapping face + hair + ears!")
    else:
        print("FACE SWAP MODE: Swapping face only")
    
    processed_frames = []
    
    for i, frame in enumerate(clip.iter_frames()):
        frame_bgr = frame[:, :, ::-1]
        
        swapped = swap_face_in_frame(
            frame_bgr,
            source_face,
            target_face_index,
            include_hair
        )
        
        swapped_rgb = swapped[:, :, ::-1]
        processed_frames.append(swapped_rgb)
        
        if i % 3 == 0:
            progress_fn((i + 1) / total_frames, desc=f"Frame {i+1}/{total_frames}")
    
    output_clip = ImageSequenceClip(processed_frames, fps=fps)
    
    if clip.audio is not None:
        output_clip = output_clip.set_audio(clip.audio)
    
    output_path = tempfile.mktemp(suffix='.mp4')
    output_clip.write_videofile(
        output_path,
        codec='libx264',
        audio_codec='aac',
        temp_audiofile=tempfile.mktemp(suffix='.m4a'),
        remove_temp=True
    )
    
    clip.close()
    
    return output_path

# ============================================================================
# GRADIO HANDLERS
# ============================================================================

state = {
    'source_faces': [],
    'target_faces': [],
    'video_path': None
}

def handle_source_image(image):
    if image is None:
        return None, "Upload source image", gr.Dropdown(choices=[])
    
    try:
        preview, faces = detect_faces_with_preview(image)
        state['source_faces'] = faces
        
        if not faces:
            return preview, "❌ No faces detected", gr.Dropdown(choices=[])
        
        message = f"✓ Found {len(faces)} face(s)"
        choices = [f"Face {i+1}" for i in range(len(faces))]
        
        return preview, message, gr.Dropdown(
            choices=choices,
            value=choices[0],
            interactive=True
        )
    
    except Exception as e:
        return None, f"❌ Error: {e}", gr.Dropdown(choices=[])

def handle_target_video(video):
    if video is None:
        return None, "Upload target video", gr.Dropdown(choices=[])
    
    try:
        state['video_path'] = video
        
        clip = VideoFileClip(video)
        frame = clip.get_frame(0)
        frame_bgr = frame[:, :, ::-1]
        clip.close()
        
        preview, faces = detect_faces_with_preview(frame_bgr)
        state['target_faces'] = faces
        
        if not faces:
            return preview, "❌ No faces in video", gr.Dropdown(choices=[])
        
        message = f"✓ Found {len(faces)} person(s)"
        choices = [f"Person {i+1}" for i in range(len(faces))]
        
        return preview, message, gr.Dropdown(
            choices=choices,
            value=choices[0],
            interactive=True
        )
    
    except Exception as e:
        return None, f"❌ Error: {e}", gr.Dropdown(choices=[])

def handle_generate(source_choice, target_choice, include_hair, progress=gr.Progress()):
    
    if not SWAPPER_LOADED:
        return None, "❌ INSwapper not loaded!"
    
    if not state['source_faces']:
        return None, "❌ Upload source image first"
    
    if not state['target_faces'] or not state['video_path']:
        return None, "❌ Upload target video first"
    
    try:
        source_idx = int(source_choice.split()[1]) - 1
        target_idx = int(target_choice.split()[1]) - 1
        
        source_face = state['source_faces'][source_idx]
        
        progress(0, desc="Starting...")
        
        result = process_video(
            state['video_path'],
            source_face,
            target_idx,
            include_hair,
            progress
        )
        
        progress(1.0, desc="Complete!")
        
        status = "✅ DONE!\n\n"
        status += "Applied:\n"
        status += "✓ INSwapper face swap\n"
        if include_hair:
            status += "✓ HEAD SWAP (face + hair + ears)\n"
        else:
            status += "✓ FACE SWAP (face only)\n"
        
        if CODEFORMER_LOADED:
            status += "✓ CodeFormer restoration\n"
        elif GFPGAN_LOADED:
            status += "✓ GFPGAN restoration\n"
        
        return result, status
        
    except Exception as e:
        import traceback
        return None, f"❌ Error:\n{e}\n\n{traceback.format_exc()}"

# ============================================================================
# GRADIO UI
# ============================================================================

print("\n[7/7] Building interface...")

with gr.Blocks(theme=gr.themes.Soft(), title="Ultimate Face Swap") as demo:
    
    gr.Markdown("# 🔥 ULTIMATE FACE SWAP + HEAD SWAP!")
    gr.Markdown("### Professional face swapping with enhancement")
    
    if SWAPPER_LOADED:
        gr.Markdown("✅ **INSwapper Loaded**")
    else:
        gr.Markdown("❌ **INSwapper Failed**")
    
    if CODEFORMER_LOADED:
        gr.Markdown("✅ **CodeFormer Active**")
    elif GFPGAN_LOADED:
        gr.Markdown("✅ **GFPGAN Active**")
    else:
        gr.Markdown("⚠️ **No restoration available**")
    
    with gr.Row():
        with gr.Column():
            gr.Markdown("### 📸 Source Image")
            source_image = gr.Image(type="numpy", label="Upload Source Face")
            source_preview = gr.Image(label="Detected", height=300)
            source_status = gr.Textbox(label="Status", lines=2)
            source_dropdown = gr.Dropdown(label="Select Face")
        
        with gr.Column():
            gr.Markdown("### 🎬 Target Video")
            target_video = gr.Video(label="Upload Target Video")
            target_preview = gr.Image(label="Detected", height=300)
            target_status = gr.Textbox(label="Status", lines=2)
            target_dropdown = gr.Dropdown(label="Select Person")
    
    gr.Markdown("### 🚀 Generate Video")
    
    head_swap_checkbox = gr.Checkbox(
        value=False,
        label="🔥 HEAD SWAP MODE (includes hair, ears, neck!)"
    )
    
    generate_button = gr.Button(
        "🎭 Generate Video!",
        variant="primary",
        size="lg"
    )
    
    generation_status = gr.Textbox(label="Status", lines=6)
    result_video = gr.Video(label="Result")
    
    # Events
    source_image.change(
        handle_source_image,
        inputs=[source_image],
        outputs=[source_preview, source_status, source_dropdown]
    )
    
    target_video.change(
        handle_target_video,
        inputs=[target_video],
        outputs=[target_preview, target_status, target_dropdown]
    )
    
    generate_button.click(
        handle_generate,
        inputs=[source_dropdown, target_dropdown, head_swap_checkbox],
        outputs=[result_video, generation_status]
    )

print("✓ Interface built")

print("\n" + "="*80)
print("LAUNCHING!")
print("="*80)

demo.queue()
demo.launch()  # Removed share=True for Spaces

print("\n✅ Running!")
print("="*80)