Spaces:

liamu
/

DeepFakes-Detection

Running

File size: 9,117 Bytes

3a24a03

import sys
from pathlib import Path

# 1. Injeta o caminho ANTES de qualquer import local ou externo
ROOT_DIR = Path(__file__).resolve().parent.parent
CLIP_SURGERY_PATH = str(ROOT_DIR / "CLIP_Surgery")

if CLIP_SURGERY_PATH not in sys.path:
    sys.path.insert(0, CLIP_SURGERY_PATH)

import clip as clip_surgery
import cv2
import mediapipe.python.solutions.face_mesh as mp_face_mesh
import numpy as np
import torch
from PIL import Image
from segmenter import FaceSegmenter
from huggingface_hub import hf_hub_download

from torchvision import transforms
from torchvision.transforms import InterpolationMode

from config import (
    CLIP_MEAN,
    CLIP_STD,
    DEVICE,
    FAKE_PROMPT_KEYWORDS,
    REAL_PROMPTS,
    SURGERY_PROMPTS,
    SURGERY_RES,
)

# ════════════════════════════════════════════════════════════
# MEDIAPIPE — Máscara facial + Regiões
# ════════════════════════════════════════════════════════════

_face_mesh = None

_segmenter_instance = None

def get_segmenter():
    """Lazy loading rigoroso do BiSeNet. Só carrega quando chamado a primeira vez."""
    global _segmenter_instance
    if _segmenter_instance is None:
        
        
        print(" A instanciar BiSeNet FaceSegmenter...")
        model_path = hf_hub_download(repo_id="liamu/Deepfake-Pesos", filename="79999_iter.pth")
        _segmenter_instance = FaceSegmenter(model_path=model_path, device=DEVICE)
        print(" BiSeNet pronto.")
    return _segmenter_instance

def get_region_masks(img_rgb: np.ndarray) -> dict:
    # Chama o método na instância correta
    return get_segmenter().get_masks(img_rgb)


def get_face_mesh():
    global _face_mesh
    if _face_mesh is None:
        # Usa a importacao direta do modulo
        _face_mesh = mp_face_mesh.FaceMesh(
            static_image_mode=True,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5,
        )
    return _face_mesh


def build_face_mask(img_rgb: np.ndarray) -> np.ndarray:
    """
    Cria máscara da região facial via MediaPipe convexHull.
    Expande 10% para cima para incluir a testa.
    Fallback: máscara de uns se face não detectada.
    """
    h, w = img_rgb.shape[:2]
    mesh = get_face_mesh()
    result = mesh.process(img_rgb)

    if not result.multi_face_landmarks:
        return np.ones((h, w), dtype=np.float32)

    lm = result.multi_face_landmarks[0].landmark
    points = np.array(
        [(int(lm_point.x * w), int(lm_point.y * h)) for lm_point in lm], dtype=np.int32
    )
    hull = cv2.convexHull(points)

    y_min = hull[:, 0, 1].min()
    y_max = hull[:, 0, 1].max()
    face_h = y_max - y_min

    # 10% padding para cima
    forehead_expansion = int(face_h * 0.10)
    hull_expanded = hull.copy()
    top_mask = hull_expanded[:, 0, 1] < (y_min + face_h * 0.35)
    hull_expanded[top_mask, 0, 1] = np.maximum(
        0, hull_expanded[top_mask, 0, 1] - forehead_expansion
    )

    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.fillConvexPoly(mask, hull_expanded, 1)
    mask_f = cv2.GaussianBlur(mask.astype(np.float32), (31, 31), 0)
    mask_f = mask_f / (mask_f.max() + 1e-8)
    return mask_f


# ════════════════════════════════════════════════════════════
# CLIP SURGERY — Heatmaps visuais
# ════════════════════════════════════════════════════════════

_surgery_model = None
_surgery_preprocess = None


def get_surgery_model():
    global _surgery_model, _surgery_preprocess
    if _surgery_model is None:
        print(" A carregar CLIP Surgery CS-ViT-L/14...")
        _surgery_model, _ = clip_surgery.load("CS-ViT-L/14", device=DEVICE)
        _surgery_model.eval()
        _surgery_preprocess = transforms.Compose(
            [
                transforms.Resize(
                    (SURGERY_RES, SURGERY_RES),
                    interpolation=InterpolationMode.BICUBIC,
                ),
                transforms.ToTensor(),
                transforms.Normalize(CLIP_MEAN, CLIP_STD),
            ]
        )
        print(" CLIP Surgery pronto.")
    return _surgery_model, _surgery_preprocess


def generate_heatmap(img_rgb, method: str = ""):
    """
    Gera heatmap via CLIP Surgery com análise contrastiva.

    Heatmap contrastivo = manipulação - real
    → Só ficam activas as zonas onde manipulação > real
    → Elimina a contradição de "AI face" e "real human face" activarem igual

    Devolve: contrastive, per_text, scores, prompts, top_heatmap
      contrastive — heatmap manipulação - real (zonas genuinamente suspeitas)
      per_text    — dict {prompt: heatmap normalizado}
      scores      — dict {prompt: score médio na face}
      prompts     — lista de prompts usados
      top_heatmap — heatmap do prompt de manipulação com maior score
    """

    prompts = SURGERY_PROMPTS

    sm, sp = get_surgery_model()
    h, w = img_rgb.shape[:2]
    tensor = sp(Image.fromarray(img_rgb)).unsqueeze(0).to(DEVICE)

    with torch.no_grad():
        img_feats = sm.encode_image(tensor)
        img_feats = img_feats / img_feats.norm(dim=-1, keepdim=True)
        txt_feats = clip_surgery.encode_text_with_prompt_ensemble(
            sm, prompts, DEVICE
        )  # Features de texto para cada prompt → shape (n_prompts, 512)
        similarity = clip_surgery.clip_feature_surgery(
            img_feats, txt_feats
        )  # L2 e faz similiaridade para cada patch e prompt, removendo CLS para isolar o que é especifico
        sim_map = clip_surgery.get_similarity_map(
            similarity[:, 1:, :], (h, w)
        )  # remove CLS

    face_mask = build_face_mask(img_rgb)
    face_pixels = face_mask > 0.5
    sim_np = sim_map[0].cpu().numpy()

    fake_maps = []
    real_maps = []
    per_text = {}
    scores = {}

    for n, text in enumerate(prompts):
        m = sim_np[:, :, n]  # heatmap para o prompt n
        m = (m - m.min()) / (m.max() - m.min() + 1e-8)  # normaliza para 0-1
        m = m * face_mask  # Nao considera zonas fora da face
        m = (m - m.min()) / (m.max() - m.min() + 1e-8)  # re-normaliza após a mascara
        per_text[text] = m.astype(np.float32)
        scores[text] = float(m[face_pixels].mean()) if face_pixels.any() else 0.0

        is_manip = any(kw.lower() in text.lower() for kw in FAKE_PROMPT_KEYWORDS)
        is_real = text in REAL_PROMPTS

        if is_manip:
            fake_maps.append(m)
        elif is_real:
            real_maps.append(m)

    # Heatmap de manipulação médio
    manip_mean = (
        np.mean(fake_maps, axis=0).astype(np.float32)
        if fake_maps
        else np.zeros((h, w), dtype=np.float32)
    )

    # Heatmap real médio
    real_mean = (
        np.mean(real_maps, axis=0).astype(np.float32)
        if real_maps
        else np.zeros((h, w), dtype=np.float32)
    )

    # Heatmap contrastivo: manipulação - real → só zonas genuinamente suspeitas
    contrastive = manip_mean - real_mean
    contrastive = np.clip(contrastive, 0, None)  # só valores positivos
    if contrastive.max() > 1e-8:
        contrastive = (contrastive / contrastive.max()).astype(np.float32)
    contrastive = contrastive * face_mask

    # Top heatmap: prompt de manipulação com maior score
    manip_scores = {
        t: s
        for t, s in scores.items()
        if any(kw.lower() in t.lower() for kw in FAKE_PROMPT_KEYWORDS)
    }
    top_prompt_name = (
        max(manip_scores, key=manip_scores.get)
        if manip_scores
        else max(scores, key=scores.get)
    )
    top_heatmap = per_text[top_prompt_name]

    return contrastive, per_text, scores, prompts, top_heatmap


def score_regions_manipulation(img_hires, heatmap, masks, scores):
    """
    img_hires: A imagem original (usada para referencia de tamanho).
    heatmap: O mapa de calor gerado pelo CLIP.
    masks: O dicionario de mascaras gerado pelo BiSeNet.
    scores: Limiares de pontuacao ou configuracoes adicionais.
    """
    reg_scores = {}

    # 1. Garantir que o heatmap esta na mesma escala que as mascaras
    h, w = heatmap.shape[:2]

    # 2. Calcular scores por regiao
    for name, mask in masks.items():
        # Redimensionar a mascara do BiSeNet (512x512) para o tamanho do heatmap
        mask_resized = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)

        # Aplicar mascara
        masked_heatmap = heatmap * mask_resized

        # Calcular score (Percentil 95 para capturar picos de anomalia)
        if np.sum(mask_resized) > 0:
            score = np.percentile(masked_heatmap[mask_resized > 0], 95)
        else:
            score = 0.0

        reg_scores[name] = {"contrast": score}

    return reg_scores