Spaces:

dreamlessx
/

LandmarkDiff

Sleeping

File size: 14,689 Bytes

83b71db

"""Post-processing: CodeFormer/GFPGAN face restore, Real-ESRGAN bg,
Laplacian blend, sharpening, histogram matching, ArcFace identity gate.
"""

from __future__ import annotations

import cv2
import numpy as np


def laplacian_pyramid_blend(
    source: np.ndarray,
    target: np.ndarray,
    mask: np.ndarray,
    levels: int = 6,
) -> np.ndarray:
    """Laplacian pyramid blend - kills the 'pasted on' look from alpha blending."""
    # Ensure same size
    h, w = target.shape[:2]
    source = cv2.resize(source, (w, h)) if source.shape[:2] != (h, w) else source

    # Normalize mask
    mask_f = mask.astype(np.float32)
    if mask_f.max() > 1.0:
        mask_f = mask_f / 255.0
    if mask_f.ndim == 2:
        mask_3ch = np.stack([mask_f] * 3, axis=-1)
    else:
        mask_3ch = mask_f

    # Make dimensions divisible by 2^levels
    factor = 2 ** levels
    new_h = (h + factor - 1) // factor * factor
    new_w = (w + factor - 1) // factor * factor

    if new_h != h or new_w != w:
        source = cv2.resize(source, (new_w, new_h))
        target = cv2.resize(target, (new_w, new_h))
        mask_3ch = cv2.resize(mask_3ch, (new_w, new_h))

    src_f = source.astype(np.float32)
    tgt_f = target.astype(np.float32)

    # Build Gaussian pyramids for the mask
    mask_pyr = [mask_3ch]
    for _ in range(levels):
        mask_pyr.append(cv2.pyrDown(mask_pyr[-1]))

    # Build Laplacian pyramids for source and target
    src_lap = _build_laplacian_pyramid(src_f, levels)
    tgt_lap = _build_laplacian_pyramid(tgt_f, levels)

    # Blend each level using the mask at that resolution
    blended_lap = []
    for i in range(levels + 1):
        sl = src_lap[i]
        tl = tgt_lap[i]
        ml = mask_pyr[i]
        # Resize mask to match level shape if needed
        if ml.shape[:2] != sl.shape[:2]:
            ml = cv2.resize(ml, (sl.shape[1], sl.shape[0]))
        blended = sl * ml + tl * (1.0 - ml)
        blended_lap.append(blended)

    # Reconstruct from blended Laplacian
    result = _reconstruct_from_laplacian(blended_lap)

    # Crop back to original size
    result = result[:h, :w]
    return np.clip(result, 0, 255).astype(np.uint8)


def _build_laplacian_pyramid(
    image: np.ndarray,
    levels: int,
) -> list[np.ndarray]:
    """Build Laplacian pyramid from an image."""
    gaussian = [image.copy()]
    for _ in range(levels):
        gaussian.append(cv2.pyrDown(gaussian[-1]))

    laplacian = []
    for i in range(levels):
        upsampled = cv2.pyrUp(gaussian[i + 1])
        # Match sizes (pyrUp can add a pixel)
        gh, gw = gaussian[i].shape[:2]
        upsampled = upsampled[:gh, :gw]
        laplacian.append(gaussian[i] - upsampled)

    laplacian.append(gaussian[-1])  # coarsest level
    return laplacian


def _reconstruct_from_laplacian(pyramid: list[np.ndarray]) -> np.ndarray:
    """Reconstruct image from Laplacian pyramid."""
    image = pyramid[-1].copy()
    for i in range(len(pyramid) - 2, -1, -1):
        image = cv2.pyrUp(image)
        lh, lw = pyramid[i].shape[:2]
        image = image[:lh, :lw]
        image = image + pyramid[i]
    return image


def frequency_aware_sharpen(
    image: np.ndarray,
    strength: float = 0.3,
    radius: int = 3,
) -> np.ndarray:
    """Unsharp mask on LAB luminance only - sharpens skin texture without color fringe."""
    lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB).astype(np.float32)
    l_channel = lab[:, :, 0]

    # Unsharp mask on luminance only
    ksize = radius * 2 + 1
    blurred = cv2.GaussianBlur(l_channel, (ksize, ksize), 0)
    sharpened = l_channel + strength * (l_channel - blurred)

    lab[:, :, 0] = np.clip(sharpened, 0, 255)
    return cv2.cvtColor(lab.astype(np.uint8), cv2.COLOR_LAB2BGR)


def restore_face_gfpgan(
    image: np.ndarray,
    upscale: int = 1,
) -> np.ndarray:
    """GFPGAN face restore. Returns original if not installed."""
    try:
        from gfpgan import GFPGANer
    except ImportError:
        return image

    try:
        restorer = GFPGANer(
            model_path="https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.3.pth",
            upscale=upscale,
            arch="clean",
            channel_multiplier=2,
            bg_upsampler=None,
        )
        _, _, restored = restorer.enhance(
            image,
            has_aligned=False,
            only_center_face=True,
            paste_back=True,
        )
        if restored is not None:
            return restored
    except Exception:
        pass

    return image


def restore_face_codeformer(
    image: np.ndarray,
    fidelity: float = 0.7,
    upscale: int = 1,
) -> np.ndarray:
    """CodeFormer face restore. fidelity: 0=quality, 1=identity. Returns original if not installed."""
    try:
        from codeformer.basicsr.utils import img2tensor, tensor2img
        from codeformer.facelib.utils.face_restoration_helper import FaceRestoreHelper
        from codeformer.basicsr.utils.download_util import load_file_from_url
        import torch
        from torchvision.transforms.functional import normalize as tv_normalize
    except ImportError:
        return image

    try:
        from codeformer.inference_codeformer import set_realesrgan as _unused  # noqa: F401
        from codeformer.basicsr.archs.codeformer_arch import CodeFormer as CodeFormerArch

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        model = CodeFormerArch(
            dim_embd=512, codebook_size=1024, n_head=8, n_layers=9,
            connect_list=["32", "64", "128", "256"],
        ).to(device)

        ckpt_path = load_file_from_url(
            url="https://github.com/sczhou/CodeFormer/releases/download/v0.1.0/codeformer.pth",
            model_dir="weights/CodeFormer",
            progress=True,
        )
        checkpoint = torch.load(ckpt_path, map_location=device, weights_only=False)
        model.load_state_dict(checkpoint["params_ema"])
        model.eval()

        face_helper = FaceRestoreHelper(
            upscale,
            face_size=512,
            crop_ratio=(1, 1),
            det_model="retinaface_resnet50",
            save_ext="png",
            device=device,
        )
        face_helper.read_image(image)
        face_helper.get_face_landmarks_5(only_center_face=True)
        face_helper.align_warp_face()

        for cropped_face in face_helper.cropped_faces:
            face_t = img2tensor(cropped_face / 255.0, bgr2rgb=True, float32=True)
            tv_normalize(face_t, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True)
            face_t = face_t.unsqueeze(0).to(device)

            with torch.no_grad():
                output = model(face_t, w=fidelity, adain=True)[0]
                restored = tensor2img(output, rgb2bgr=True, min_max=(-1, 1))
            restored = restored.astype(np.uint8)
            face_helper.add_restored_face(restored)

        face_helper.get_inverse_affine(None)
        restored_img = face_helper.paste_faces_to_image()
        if restored_img is not None:
            return restored_img
    except Exception:
        pass

    return image


def enhance_background_realesrgan(
    image: np.ndarray,
    mask: np.ndarray,
    outscale: int = 2,
) -> np.ndarray:
    """Real-ESRGAN on background only (outside mask). Returns original if not installed."""
    try:
        from realesrgan import RealESRGANer
        from basicsr.archs.rrdbnet_arch import RRDBNet
        import torch
    except ImportError:
        return image

    try:
        model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
        upsampler = RealESRGANer(
            scale=4,
            model_path="https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth",
            model=model,
            tile=400,
            tile_pad=10,
            pre_pad=0,
            half=torch.cuda.is_available(),
        )
        enhanced, _ = upsampler.enhance(image, outscale=outscale)

        # Downscale back to original size
        h, w = image.shape[:2]
        enhanced = cv2.resize(enhanced, (w, h), interpolation=cv2.INTER_LANCZOS4)

        # Only apply enhancement to background (outside mask)
        mask_f = mask.astype(np.float32)
        if mask_f.max() > 1.0:
            mask_f /= 255.0
        if mask_f.ndim == 2:
            mask_3ch = np.stack([mask_f] * 3, axis=-1)
        else:
            mask_3ch = mask_f

        # Keep face region from original, use enhanced for background
        result = (
            image.astype(np.float32) * mask_3ch
            + enhanced.astype(np.float32) * (1.0 - mask_3ch)
        ).astype(np.uint8)
        return result
    except Exception:
        pass

    return image


def verify_identity_arcface(
    original: np.ndarray,
    result: np.ndarray,
    threshold: float = 0.6,
) -> dict:
    """ArcFace cosine similarity check. Flags if output drifted from input identity."""
    try:
        from insightface.app import FaceAnalysis
    except ImportError:
        return {
            "similarity": -1.0,
            "passed": True,
            "message": "InsightFace not installed - identity check skipped",
        }

    try:
        app = FaceAnalysis(
            name="buffalo_l",
            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
        )
        app.prepare(ctx_id=0 if _has_cuda() else -1, det_size=(320, 320))

        orig_faces = app.get(original)
        result_faces = app.get(result)

        if not orig_faces or not result_faces:
            return {
                "similarity": -1.0,
                "passed": True,
                "message": "Could not detect face in one/both images - check skipped",
            }

        orig_emb = orig_faces[0].embedding
        result_emb = result_faces[0].embedding

        sim = float(np.dot(orig_emb, result_emb) / (
            np.linalg.norm(orig_emb) * np.linalg.norm(result_emb) + 1e-8
        ))
        sim = float(np.clip(sim, 0, 1))

        passed = sim >= threshold
        if passed:
            msg = f"Identity preserved (similarity={sim:.3f})"
        else:
            msg = f"WARNING: Identity drift detected (similarity={sim:.3f} < {threshold})"

        return {"similarity": sim, "passed": passed, "message": msg}
    except Exception as e:
        return {
            "similarity": -1.0,
            "passed": True,
            "message": f"Identity check failed: {e}",
        }


def _has_cuda() -> bool:
    try:
        import torch
        return torch.cuda.is_available()
    except ImportError:
        return False


def histogram_match_skin(
    source: np.ndarray,
    reference: np.ndarray,
    mask: np.ndarray,
) -> np.ndarray:
    """CDF-based histogram matching in LAB space. Better than mean/std for skin."""
    mask_bool = mask > 0.3 if mask.dtype == np.float32 else mask > 76

    if not np.any(mask_bool):
        return source

    result = source.copy()
    src_lab = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
    ref_lab = cv2.cvtColor(reference, cv2.COLOR_BGR2LAB).astype(np.float32)

    for ch in range(3):
        src_vals = src_lab[:, :, ch][mask_bool]
        ref_vals = ref_lab[:, :, ch][mask_bool]

        if len(src_vals) == 0 or len(ref_vals) == 0:
            continue

        # CDF matching
        src_sorted = np.sort(src_vals)
        ref_sorted = np.sort(ref_vals)

        # Interpolate reference CDF to match source length
        src_cdf = np.linspace(0, 1, len(src_sorted))
        ref_cdf = np.linspace(0, 1, len(ref_sorted))

        # Map source values through reference distribution
        mapping = np.interp(src_cdf, ref_cdf, ref_sorted)

        # Create lookup from source intensity to matched intensity
        src_flat = src_lab[:, :, ch].ravel()
        matched = np.interp(src_flat, src_sorted, mapping)
        matched_2d = matched.reshape(src_lab.shape[:2])

        # Apply only in mask region
        src_lab[:, :, ch] = np.where(mask_bool, matched_2d, src_lab[:, :, ch])

    result_lab = np.clip(src_lab, 0, 255).astype(np.uint8)
    return cv2.cvtColor(result_lab, cv2.COLOR_LAB2BGR)


def full_postprocess(
    generated: np.ndarray,
    original: np.ndarray,
    mask: np.ndarray,
    restore_mode: str = "codeformer",
    codeformer_fidelity: float = 0.7,
    use_realesrgan: bool = True,
    use_laplacian_blend: bool = True,
    sharpen_strength: float = 0.25,
    verify_identity: bool = True,
    identity_threshold: float = 0.6,
) -> dict:
    """Full pipeline: restore -> bg enhance -> histogram match -> sharpen -> blend -> identity check."""
    result = generated.copy()
    restore_used = "none"

    # Step 1: Neural face restoration (CodeFormer > GFPGAN > skip)
    if restore_mode == "codeformer":
        restored = restore_face_codeformer(result, fidelity=codeformer_fidelity)
        if restored is not result:
            result = restored
            restore_used = "codeformer"
        else:
            # CodeFormer unavailable, fall back to GFPGAN
            result = restore_face_gfpgan(result)
            restore_used = "gfpgan" if result is not generated else "none"
    elif restore_mode == "gfpgan":
        restored = restore_face_gfpgan(result)
        if restored is not result:
            result = restored
            restore_used = "gfpgan"

    # Step 2: Neural background enhancement
    if use_realesrgan:
        result = enhance_background_realesrgan(result, mask)

    # Step 3: Skin tone histogram matching (classical)
    result = histogram_match_skin(result, original, mask)

    # Step 4: Sharpen texture (classical)
    if sharpen_strength > 0:
        result = frequency_aware_sharpen(result, strength=sharpen_strength)

    # Step 5: Blend into original (classical)
    if use_laplacian_blend:
        composited = laplacian_pyramid_blend(result, original, mask)
    else:
        mask_f = mask.astype(np.float32)
        if mask_f.max() > 1.0:
            mask_f /= 255.0
        if mask_f.ndim == 2:
            mask_3ch = np.stack([mask_f] * 3, axis=-1)
        else:
            mask_3ch = mask_f
        composited = (
            result.astype(np.float32) * mask_3ch
            + original.astype(np.float32) * (1.0 - mask_3ch)
        ).astype(np.uint8)

    # Step 6: Neural identity verification
    identity_check = {"similarity": -1.0, "passed": True, "message": "skipped"}
    if verify_identity:
        identity_check = verify_identity_arcface(
            original, composited, threshold=identity_threshold,
        )

    return {
        "image": composited,
        "identity_check": identity_check,
        "restore_used": restore_used,
    }