Spaces:

ChristianDW15
/

deepfake-api

Sleeping

File size: 6,228 Bytes

import gradio as gr
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from PIL import Image, ImageFilter
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from transformers import DeiTForImageClassification
import base64
import io

# ─── FUNGSI HELPER: KONVERSI GAMBAR KE BASE64 ───
def pil_to_b64(img):
    if img is None: return None
    buffered = io.BytesIO()
    img.save(buffered, format="JPEG")
    return "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode('utf-8')

# ─── KONFIGURASI ───
DEVICE = torch.device("cpu") # Hugging Face Free Space menggunakan CPU
MODEL_REPO = "ChristianDW15/deepfake-deit"
IMG_SIZE = 224
FACE_MARGIN = 40
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

# ─── KELAS AI (Diringkas dari script asli Anda) ───
class FaceDetector:
    def __init__(self):
        from facenet_pytorch import MTCNN
        self.mtcnn = MTCNN(keep_all=True, min_face_size=80, thresholds=[0.8, 0.8, 0.8], device=DEVICE)

    def detect_and_crop(self, pil_image):
        boxes, probs = self.mtcnn.detect(pil_image)
        if boxes is None: return []
        results = []
        img_w, img_h = pil_image.size
        for idx, (box, prob) in enumerate(zip(boxes, probs)):
            if prob is None or prob < 0.95: continue
            x1 = max(0, int(box[0]) - FACE_MARGIN)
            y1 = max(0, int(box[1]) - FACE_MARGIN)
            x2 = min(img_w, int(box[2]) + FACE_MARGIN)
            y2 = min(img_h, int(box[3]) + FACE_MARGIN)
            crop = pil_image.crop((x1, y1, x2, y2)).convert("RGB")
            results.append({"face_img": crop, "prob": float(prob), "idx": idx})
        return results

class DeiTGradCAM:
    def __init__(self, model):
        self.model = model
        self._acts = None
        self._grads = None
        self._hooks = []

    def _register_hooks(self):
        def fwd_hook(m, inp, out): self._acts = out.detach().clone()
        def bwd_hook(m, gin, gout): self._grads = gout[0].detach().clone()
        target = self.model.deit.encoder.layer[-1]
        self._hooks.extend([target.register_forward_hook(fwd_hook), target.register_full_backward_hook(bwd_hook)])

    def generate(self, tensor):
        self._hooks.clear()
        self._register_hooks()
        self.model.zero_grad()
        inp = tensor.requires_grad_(True)
        out = self.model(pixel_values=inp)
        out.logits[0, 0].backward()
        for h in self._hooks: h.remove()
        
        acts, grads = self._acts[0], self._grads[0]
        weights = grads.mean(dim=0)
        cam = torch.matmul(acts, weights)[2:].detach().numpy()
        cam = np.maximum(cam, 0)
        if cam.max() == 0: return np.zeros((IMG_SIZE, IMG_SIZE))
        
        n = int(np.sqrt(len(cam)))
        cam_2d = cam[:n*n].reshape(n, n)
        cam_t = torch.tensor(cam_2d).unsqueeze(0).unsqueeze(0).float()
        cam_up = F.interpolate(cam_t, size=(IMG_SIZE, IMG_SIZE), mode="bicubic")[0, 0].numpy()
        cam_up = np.maximum(cam_up, 0)
        try:
            from scipy.ndimage import gaussian_filter
            cam_up = gaussian_filter(cam_up, sigma=8)
        except: pass
        if cam_up.max() > 0: cam_up /= cam_up.max()
        return cam_up

# ─── INISIALISASI MODEL GLOBAL ───
detector = FaceDetector()
model = DeiTForImageClassification.from_pretrained("facebook/deit-base-patch16-224", num_labels=2, ignore_mismatched_sizes=True)
import huggingface_hub
weights_path = huggingface_hub.hf_hub_download(repo_id=MODEL_REPO, filename="model.safetensors")
from safetensors.torch import load_file
model.load_state_dict(load_file(weights_path))
model.eval()
gradcam = DeiTGradCAM(model)

import torchvision.transforms as T
tf = T.Compose([T.Resize((IMG_SIZE, IMG_SIZE)), T.ToTensor(), T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)])

def process_api(image):
    if image is None: return {"error": "No image"}
    faces = detector.detect_and_crop(image)
    if not faces:
        faces = [{"face_img": image, "idx": 0}] # Fallback full image
    
    results = []
    global_fake_prob = 0
    
    for f in faces:
        face_pil = f["face_img"]
        tensor = tf(face_pil).unsqueeze(0)
        
        with torch.no_grad():
            logits = model(pixel_values=tensor.clone()).logits[0]
            probs = torch.softmax(logits, dim=0)
        
        p_fake, p_real = probs[0].item(), probs[1].item()
        global_fake_prob = max(global_fake_prob, p_fake)
        label = "FAKE" if p_fake > p_real else "REAL"
        
        # Buat visualisasi
        face_pil_resized = face_pil.resize((300, 300))
        crop_b64 = pil_to_b64(face_pil_resized)
        
        heatmap_b64 = None
        overlay_b64 = None
        
        if label == "FAKE":
            hm = gradcam.generate(tensor.clone())
            hm = np.clip(hm, 0.0, 1.0)
            heat_pil = Image.fromarray((hm * 255).astype(np.uint8)).resize((300, 300)).filter(ImageFilter.GaussianBlur(3))
            cmap = plt.get_cmap("hot")
            heat_rgb = (cmap(np.array(heat_pil)/255.0)*255).astype(np.uint8)[:,:,:3]
            
            heatmap_b64 = pil_to_b64(Image.fromarray(heat_rgb))
            
            overlay = np.clip(0.55 * heat_rgb + 0.45 * np.array(face_pil_resized), 0, 255).astype(np.uint8)
            overlay_b64 = pil_to_b64(Image.fromarray(overlay))

        results.append({
            "idx": f["idx"] + 1,
            "label": label,
            "fake_score": p_fake,
            "real_score": p_real,
            "original_b64": crop_b64,
            "heatmap_b64": heatmap_b64,
            "overlay_b64": overlay_b64
        })
        
    return {
        "verdict": "DEEPFAKE" if global_fake_prob > 0.5 else "AUTHENTIC",
        "global_fake_score": global_fake_prob,
        "global_real_score": 1 - global_fake_prob,
        "faces": results
    }

demo = gr.Interface(fn=process_api, inputs=gr.Image(type="pil"), outputs="json")
if __name__ == "__main__": demo.launch()