import gradio as gr import torch import torch.nn as nn import torch.nn.functional as F import numpy as np from PIL import Image, ImageFilter import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from transformers import DeiTForImageClassification import base64 import io # ─── FUNGSI HELPER: KONVERSI GAMBAR KE BASE64 ─── def pil_to_b64(img): if img is None: return None buffered = io.BytesIO() img.save(buffered, format="JPEG") return "data:image/jpeg;base64," + base64.b64encode(buffered.getvalue()).decode('utf-8') # ─── KONFIGURASI ─── DEVICE = torch.device("cpu") # Hugging Face Free Space menggunakan CPU MODEL_REPO = "ChristianDW15/deepfake-deit" IMG_SIZE = 224 FACE_MARGIN = 40 IMAGENET_MEAN = [0.485, 0.456, 0.406] IMAGENET_STD = [0.229, 0.224, 0.225] # ─── KELAS AI (Diringkas dari script asli Anda) ─── class FaceDetector: def __init__(self): from facenet_pytorch import MTCNN self.mtcnn = MTCNN(keep_all=True, min_face_size=80, thresholds=[0.8, 0.8, 0.8], device=DEVICE) def detect_and_crop(self, pil_image): boxes, probs = self.mtcnn.detect(pil_image) if boxes is None: return [] results = [] img_w, img_h = pil_image.size for idx, (box, prob) in enumerate(zip(boxes, probs)): if prob is None or prob < 0.95: continue x1 = max(0, int(box[0]) - FACE_MARGIN) y1 = max(0, int(box[1]) - FACE_MARGIN) x2 = min(img_w, int(box[2]) + FACE_MARGIN) y2 = min(img_h, int(box[3]) + FACE_MARGIN) crop = pil_image.crop((x1, y1, x2, y2)).convert("RGB") results.append({"face_img": crop, "prob": float(prob), "idx": idx}) return results class DeiTGradCAM: def __init__(self, model): self.model = model self._acts = None self._grads = None self._hooks = [] def _register_hooks(self): def fwd_hook(m, inp, out): self._acts = out.detach().clone() def bwd_hook(m, gin, gout): self._grads = gout[0].detach().clone() target = self.model.deit.encoder.layer[-1] self._hooks.extend([target.register_forward_hook(fwd_hook), target.register_full_backward_hook(bwd_hook)]) def generate(self, tensor): self._hooks.clear() self._register_hooks() self.model.zero_grad() inp = tensor.requires_grad_(True) out = self.model(pixel_values=inp) out.logits[0, 0].backward() for h in self._hooks: h.remove() acts, grads = self._acts[0], self._grads[0] weights = grads.mean(dim=0) cam = torch.matmul(acts, weights)[2:].detach().numpy() cam = np.maximum(cam, 0) if cam.max() == 0: return np.zeros((IMG_SIZE, IMG_SIZE)) n = int(np.sqrt(len(cam))) cam_2d = cam[:n*n].reshape(n, n) cam_t = torch.tensor(cam_2d).unsqueeze(0).unsqueeze(0).float() cam_up = F.interpolate(cam_t, size=(IMG_SIZE, IMG_SIZE), mode="bicubic")[0, 0].numpy() cam_up = np.maximum(cam_up, 0) try: from scipy.ndimage import gaussian_filter cam_up = gaussian_filter(cam_up, sigma=8) except: pass if cam_up.max() > 0: cam_up /= cam_up.max() return cam_up # ─── INISIALISASI MODEL GLOBAL ─── detector = FaceDetector() model = DeiTForImageClassification.from_pretrained("facebook/deit-base-patch16-224", num_labels=2, ignore_mismatched_sizes=True) import huggingface_hub weights_path = huggingface_hub.hf_hub_download(repo_id=MODEL_REPO, filename="model.safetensors") from safetensors.torch import load_file model.load_state_dict(load_file(weights_path)) model.eval() gradcam = DeiTGradCAM(model) import torchvision.transforms as T tf = T.Compose([T.Resize((IMG_SIZE, IMG_SIZE)), T.ToTensor(), T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)]) def process_api(image): if image is None: return {"error": "No image"} faces = detector.detect_and_crop(image) if not faces: faces = [{"face_img": image, "idx": 0}] # Fallback full image results = [] global_fake_prob = 0 for f in faces: face_pil = f["face_img"] tensor = tf(face_pil).unsqueeze(0) with torch.no_grad(): logits = model(pixel_values=tensor.clone()).logits[0] probs = torch.softmax(logits, dim=0) p_fake, p_real = probs[0].item(), probs[1].item() global_fake_prob = max(global_fake_prob, p_fake) label = "FAKE" if p_fake > p_real else "REAL" # Buat visualisasi face_pil_resized = face_pil.resize((300, 300)) crop_b64 = pil_to_b64(face_pil_resized) heatmap_b64 = None overlay_b64 = None if label == "FAKE": hm = gradcam.generate(tensor.clone()) hm = np.clip(hm, 0.0, 1.0) heat_pil = Image.fromarray((hm * 255).astype(np.uint8)).resize((300, 300)).filter(ImageFilter.GaussianBlur(3)) cmap = plt.get_cmap("hot") heat_rgb = (cmap(np.array(heat_pil)/255.0)*255).astype(np.uint8)[:,:,:3] heatmap_b64 = pil_to_b64(Image.fromarray(heat_rgb)) overlay = np.clip(0.55 * heat_rgb + 0.45 * np.array(face_pil_resized), 0, 255).astype(np.uint8) overlay_b64 = pil_to_b64(Image.fromarray(overlay)) results.append({ "idx": f["idx"] + 1, "label": label, "fake_score": p_fake, "real_score": p_real, "original_b64": crop_b64, "heatmap_b64": heatmap_b64, "overlay_b64": overlay_b64 }) return { "verdict": "DEEPFAKE" if global_fake_prob > 0.5 else "AUTHENTIC", "global_fake_score": global_fake_prob, "global_real_score": 1 - global_fake_prob, "faces": results } demo = gr.Interface(fn=process_api, inputs=gr.Image(type="pil"), outputs="json") if __name__ == "__main__": demo.launch()