File size: 6,228 Bytes
7ac6de9
 
02b9a9c
 
 
 
 
 
 
 
 
 
7ac6de9
02b9a9c
 
 
 
 
 
 
 
 
7ac6de9
02b9a9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ac6de9
02b9a9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ac6de9
02b9a9c
 
7ac6de9
02b9a9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import gradio as gr
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from PIL import Image, ImageFilter
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from transformers import DeiTForImageClassification
import base64
import io

# ─── FUNGSI HELPER: KONVERSI GAMBAR KE BASE64 ───
def pil_to_b64(img):
    if img is None: return None
    buffered = io.BytesIO()
    img.save(buffered, format="JPEG")
    return "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode('utf-8')

# ─── KONFIGURASI ───
DEVICE = torch.device("cpu") # Hugging Face Free Space menggunakan CPU
MODEL_REPO = "ChristianDW15/deepfake-deit"
IMG_SIZE = 224
FACE_MARGIN = 40
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

# ─── KELAS AI (Diringkas dari script asli Anda) ───
class FaceDetector:
    def __init__(self):
        from facenet_pytorch import MTCNN
        self.mtcnn = MTCNN(keep_all=True, min_face_size=80, thresholds=[0.8, 0.8, 0.8], device=DEVICE)

    def detect_and_crop(self, pil_image):
        boxes, probs = self.mtcnn.detect(pil_image)
        if boxes is None: return []
        results = []
        img_w, img_h = pil_image.size
        for idx, (box, prob) in enumerate(zip(boxes, probs)):
            if prob is None or prob < 0.95: continue
            x1 = max(0, int(box[0]) - FACE_MARGIN)
            y1 = max(0, int(box[1]) - FACE_MARGIN)
            x2 = min(img_w, int(box[2]) + FACE_MARGIN)
            y2 = min(img_h, int(box[3]) + FACE_MARGIN)
            crop = pil_image.crop((x1, y1, x2, y2)).convert("RGB")
            results.append({"face_img": crop, "prob": float(prob), "idx": idx})
        return results

class DeiTGradCAM:
    def __init__(self, model):
        self.model = model
        self._acts = None
        self._grads = None
        self._hooks = []

    def _register_hooks(self):
        def fwd_hook(m, inp, out): self._acts = out.detach().clone()
        def bwd_hook(m, gin, gout): self._grads = gout[0].detach().clone()
        target = self.model.deit.encoder.layer[-1]
        self._hooks.extend([target.register_forward_hook(fwd_hook), target.register_full_backward_hook(bwd_hook)])

    def generate(self, tensor):
        self._hooks.clear()
        self._register_hooks()
        self.model.zero_grad()
        inp = tensor.requires_grad_(True)
        out = self.model(pixel_values=inp)
        out.logits[0, 0].backward()
        for h in self._hooks: h.remove()
        
        acts, grads = self._acts[0], self._grads[0]
        weights = grads.mean(dim=0)
        cam = torch.matmul(acts, weights)[2:].detach().numpy()
        cam = np.maximum(cam, 0)
        if cam.max() == 0: return np.zeros((IMG_SIZE, IMG_SIZE))
        
        n = int(np.sqrt(len(cam)))
        cam_2d = cam[:n*n].reshape(n, n)
        cam_t = torch.tensor(cam_2d).unsqueeze(0).unsqueeze(0).float()
        cam_up = F.interpolate(cam_t, size=(IMG_SIZE, IMG_SIZE), mode="bicubic")[0, 0].numpy()
        cam_up = np.maximum(cam_up, 0)
        try:
            from scipy.ndimage import gaussian_filter
            cam_up = gaussian_filter(cam_up, sigma=8)
        except: pass
        if cam_up.max() > 0: cam_up /= cam_up.max()
        return cam_up

# ─── INISIALISASI MODEL GLOBAL ───
detector = FaceDetector()
model = DeiTForImageClassification.from_pretrained("facebook/deit-base-patch16-224", num_labels=2, ignore_mismatched_sizes=True)
import huggingface_hub
weights_path = huggingface_hub.hf_hub_download(repo_id=MODEL_REPO, filename="model.safetensors")
from safetensors.torch import load_file
model.load_state_dict(load_file(weights_path))
model.eval()
gradcam = DeiTGradCAM(model)

import torchvision.transforms as T
tf = T.Compose([T.Resize((IMG_SIZE, IMG_SIZE)), T.ToTensor(), T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)])

def process_api(image):
    if image is None: return {"error": "No image"}
    faces = detector.detect_and_crop(image)
    if not faces:
        faces = [{"face_img": image, "idx": 0}] # Fallback full image
    
    results = []
    global_fake_prob = 0
    
    for f in faces:
        face_pil = f["face_img"]
        tensor = tf(face_pil).unsqueeze(0)
        
        with torch.no_grad():
            logits = model(pixel_values=tensor.clone()).logits[0]
            probs = torch.softmax(logits, dim=0)
        
        p_fake, p_real = probs[0].item(), probs[1].item()
        global_fake_prob = max(global_fake_prob, p_fake)
        label = "FAKE" if p_fake > p_real else "REAL"
        
        # Buat visualisasi
        face_pil_resized = face_pil.resize((300, 300))
        crop_b64 = pil_to_b64(face_pil_resized)
        
        heatmap_b64 = None
        overlay_b64 = None
        
        if label == "FAKE":
            hm = gradcam.generate(tensor.clone())
            hm = np.clip(hm, 0.0, 1.0)
            heat_pil = Image.fromarray((hm * 255).astype(np.uint8)).resize((300, 300)).filter(ImageFilter.GaussianBlur(3))
            cmap = plt.get_cmap("hot")
            heat_rgb = (cmap(np.array(heat_pil)/255.0)*255).astype(np.uint8)[:,:,:3]
            
            heatmap_b64 = pil_to_b64(Image.fromarray(heat_rgb))
            
            overlay = np.clip(0.55 * heat_rgb + 0.45 * np.array(face_pil_resized), 0, 255).astype(np.uint8)
            overlay_b64 = pil_to_b64(Image.fromarray(overlay))

        results.append({
            "idx": f["idx"] + 1,
            "label": label,
            "fake_score": p_fake,
            "real_score": p_real,
            "original_b64": crop_b64,
            "heatmap_b64": heatmap_b64,
            "overlay_b64": overlay_b64
        })
        
    return {
        "verdict": "DEEPFAKE" if global_fake_prob > 0.5 else "AUTHENTIC",
        "global_fake_score": global_fake_prob,
        "global_real_score": 1 - global_fake_prob,
        "faces": results
    }

demo = gr.Interface(fn=process_api, inputs=gr.Image(type="pil"), outputs="json")
if __name__ == "__main__": demo.launch()