Spaces:

vaniv
/

deepfakedetect

Sleeping

App Files Files Community

vaniv commited on Nov 7, 2025

Commit

b0d371d

verified ·

1 Parent(s): 8f07fef

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -134

app.py CHANGED Viewed

@@ -1,13 +1,71 @@
-import io
-import numpy as np
-import gradio as gr
 from PIL import Image, ImageChops, ImageDraw
 import cv2
 from skimage import exposure
 import mediapipe as mp
-# ====================== ELA (compression residual) ======================
 def _enhance_for_display(pil_img, scale: float):
     arr = np.array(pil_img).astype("float32") * scale
     arr = np.clip(arr, 0, 255).astype("uint8")
@@ -16,12 +74,10 @@ def _enhance_for_display(pil_img, scale: float):
 def error_level_analysis(pil_img: Image.Image, quality: int = 90):
     img = pil_img.convert("RGB")
     with io.BytesIO() as buf:
-        img.save(buf, "JPEG", quality=quality)
-        buf.seek(0)
         comp = Image.open(buf).convert("RGB")
     diff = ImageChops.difference(img, comp)
-    extrema = diff.getextrema()
-    max_diff = max([m for (_, m) in extrema])
     scale = 255.0 / max(1, max_diff)
     ela_vis = _enhance_for_display(diff, scale)
     ela_np = np.array(ela_vis, dtype=np.float32)
@@ -31,117 +87,51 @@ def error_level_analysis(pil_img: Image.Image, quality: int = 90):
 def ela_sweep_mean(pil_img, qualities=(95, 90, 85)):
     vals = []
     for q in qualities:
-        _, m = error_level_analysis(pil_img, quality=q)
-        vals.append(m)
     return float(max(vals)), float(np.mean(vals))
-# ====================== Frequency & Noise (support face masks) ======================
-def fft_high_freq_ratio(pil_img: Image.Image, mask=None):
     y = pil_img.convert("YCbCr").split()[0]
-    gray = np.array(y, dtype=np.float32) / 255.0
-    if mask is not None:
-        gray = gray * mask
     h, w = gray.shape
     wy, wx = np.hanning(h)[:, None], np.hanning(w)[None, :]
     F = np.fft.fftshift(np.fft.fft2(gray * (wy * wx)))
     mag = np.log1p(np.abs(F))
-    cy, cx = h // 2, w // 2
-    yy, xx = np.ogrid[:h, :w]
-    dist = np.sqrt((yy - cy) ** 2 + (xx - cx) ** 2)
     r_low = min(h, w) * 0.08
-    low_energy = float(mag[dist <= r_low].sum())
-    high_energy = float(mag[dist >  r_low].sum())
-    hf_ratio = high_energy / (high_energy + low_energy + 1e-9)
-    return None, float(hf_ratio)
-def noise_inconsistency(pil_img: Image.Image, mask=None):
     y = pil_img.convert("YCbCr").split()[0]
     img = np.array(y, dtype=np.float32)
-    if mask is not None:
-        img = img * mask
-    lap = cv2.Laplacian(img, cv2.CV_32F, ksize=3)
-    lap_abs = np.abs(lap)
-    _ = exposure.equalize_adapthist(
-        (lap_abs / (lap_abs.max() + 1e-9)).astype("float32"), clip_limit=0.01
-    )
-    tile = 32
-    H, W = lap_abs.shape
-    vals = []
     for yy in range(0, H, tile):
         for xx in range(0, W, tile):
             patch = lap_abs[yy:min(yy+tile, H), xx:min(xx+tile, W)]
-            if patch.size:
-                vals.append(patch.var())
-    if not vals:
-        return None, 0.0
     vals = np.array(vals, dtype=np.float32)
     score = float(vals.std() / (vals.mean() + 1e-9))
     return None, float(np.tanh(score / 5.0))
-# ====================== Face crop + oval mask ======================
-_mp_face = mp.solutions.face_detection.FaceDetection(
-    model_selection=0, min_detection_confidence=0.4
-)
-def crop_face(pil_img, pad=0.25):
-    img = np.array(pil_img.convert("RGB"))
-    h, w = img.shape[:2]
-    res = _mp_face.process(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
-    if not res.detections:
-        return pil_img
-    det = max(res.detections, key=lambda d: d.location_data.relative_bounding_box.width)
-    b = det.location_data.relative_bounding_box
-    x, y, bw, bh = b.xmin, b.ymin, b.width, b.height
-    x1 = int(max(0, (x - pad*bw) * w)); y1 = int(max(0, (y - pad*bh) * h))
-    x2 = int(min(w, (x + bw + pad*bw) * w)); y2 = int(min(h, (y + bh + pad*bh) * h))
-    face = Image.fromarray(img[y1:y2, x1:x2])
-    return face if face.size[0] > 20 and face.size[1] > 20 else pil_img
-def face_oval_mask(img_pil, shrink=0.80):
-    w, h = img_pil.size
-    mask = Image.new("L", (w, h), 0)
-    draw = ImageDraw.Draw(mask)
-    dx, dy = int((1 - shrink) * w / 2), int((1 - shrink) * h / 2)
-    draw.ellipse((dx, dy, w - dx, h - dy), fill=255)
-    return np.array(mask, dtype=np.float32) / 255.0
-# ====================== Natural texture correction ======================
-def natural_texture_correction(pil_img: Image.Image):
-    gray = np.array(pil_img.convert("L"), dtype=np.float32) / 255.0
-    grad_x = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
-    grad_y = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
-    edge_strength = np.mean(np.sqrt(grad_x**2 + grad_y**2))
-    flatness = np.std(gray)
-    ratio = edge_strength / (flatness + 1e-6)       # small -> smooth/realistic
-    corr = 1.0 - np.clip((0.15 - ratio) * 2.5, 0, 0.3)
-    return float(np.clip(corr, 0.7, 1.0))
-# ====================== Decision layer ======================
-def combine_scores(ela_mean, hf_ratio, noise_incons_score, texture_corr=1.0):
     w1, w2, w3 = 0.30, 0.40, 0.30
     s_ela = np.clip(ela_mean * 3.0, 0, 1)
     s_hf  = np.clip((hf_ratio - 0.65) / 0.25, 0, 1)
-    s_noi = np.clip(noise_inconsistency, 0, 1) if False else np.clip(noise_incons_score, 0, 1)  # keep identical behavior
-    suspect = float((w1*s_ela + w2*s_hf + w3*s_noi) * texture_corr)
-    label = "Likely Manipulated" if suspect >= 0.65 else "Likely Authentic"
-    return label, suspect
-# ====================== Gradio handler ======================
-def _result_card(label: str, conf: float) -> str:
     pct = max(0.0, min(1.0, conf)) * 100.0
     color = "#d84a4a" if label.startswith("Likely Manipulated") else "#2e7d32"
     bar_bg = "#e9ecef"
     return f"""
     <div style="max-width:860px;margin:0 auto;">
       <div style="border:1px solid #e5e7eb;border-radius:14px;padding:18px 20px;background:#fff;
@@ -154,63 +144,51 @@ def _result_card(label: str, conf: float) -> str:
           <div style="height:100%;width:{pct:.4f}%;background:{color};"></div>
         </div>
       </div>
     </div>
     """
-def analyze_simple(pil_img: Image.Image):
     if pil_img is None:
         return _result_card("Likely Authentic", 0.0)
-    pil_img = crop_face(pil_img)
-    pil_img = pil_img.convert("RGB").resize((512, 512))
-    oval = face_oval_mask(pil_img, shrink=0.80)
-    ela_peak, ela_avg = ela_sweep_mean(pil_img)
-    ela_mean = ela_peak * (0.85 if ela_avg < 0.06 else 1.0)
-    _, hf_ratio  = fft_high_freq_ratio(pil_img, mask=oval)
-    _, noi_score = noise_inconsistency(pil_img, mask=oval)
-    texture_corr = natural_texture_correction(pil_img)
-    label, conf = combine_scores(ela_mean, hf_ratio, noi_score, texture_corr)
-    return _result_card(label, conf)
-# ====================== UI ======================
 CUSTOM_CSS = """
 .gradio-container {max-width: 980px !important;}
-/* Card-like uploader */
 .sleek-card {
   border: 1px solid #e5e7eb; border-radius: 16px; background: #fff;
   box-shadow: 0 2px 10px rgba(16,24,40,.04); padding: 18px;
 }
 """
-with gr.Blocks(title="Deepfake Detector", css=CUSTOM_CSS, theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        "<h2 style='text-align:center;margin-bottom:6px;'>Deepfake Detector</h2>"
-        "<p style='text-align:center;color:#6b7280;'>Upload an image and get a single, clean likelihood estimate.</p>"
-    )
     with gr.Row():
         with gr.Column(scale=6, elem_classes=["sleek-card"]):
-            inp = gr.Image(
-                type="pil",
-                label="Upload / Paste Image",
-                sources=["upload", "webcam", "clipboard"],  # <-- fixed; 'url' not supported in your build
-                height=420,
-                show_label=True,
-                interactive=True,
-            )
             btn = gr.Button("Analyze", variant="primary", size="lg")
         with gr.Column(scale=6):
             out = gr.HTML()
-    btn.click(analyze_simple, inputs=inp, outputs=out)
-    inp.change(analyze_simple, inputs=inp, outputs=out)
 if __name__ == "__main__":
     demo.launch()

+import io, os, numpy as np, gradio as gr
 from PIL import Image, ImageChops, ImageDraw
 import cv2
 from skimage import exposure
 import mediapipe as mp
+# ====== HF model choice (pick one) ======
+HF_MODEL_ID = os.getenv("HF_MODEL_ID", "prithivMLmods/Deep-Fake-Detector-v2-Model")  # ViT 224
+HF_IMAGE_SIZE = int(os.getenv("HF_IMAGE_SIZE", "224"))  # 224 for v2 ViT, 512 for v1 SigLIP
+# ====== HF imports (lazy so app can start even if transformers missing) ======
+_hf_loaded = False
+_hf_processor = None
+_hf_model = None
+def _try_load_hf():
+    global _hf_loaded, _hf_processor, _hf_model
+    if _hf_loaded:
+        return True
+    try:
+        from transformers import AutoImageProcessor, AutoModelForImageClassification
+        _hf_processor = AutoImageProcessor.from_pretrained(HF_MODEL_ID)
+        _hf_model = AutoModelForImageClassification.from_pretrained(HF_MODEL_ID)
+        _hf_model.eval()
+        _hf_loaded = True
+        return True
+    except Exception as e:
+        print("HF load failed:", e)
+        _hf_loaded = False
+        return False
+def _hf_predict_proba(pil_rgb_face):
+    """Returns probability that image is deepfake, in [0,1]."""
+    import torch
+    with torch.no_grad():
+        inputs = _hf_processor(images=pil_rgb_face.resize((HF_IMAGE_SIZE, HF_IMAGE_SIZE)), return_tensors="pt")
+        outputs = _hf_model(**inputs)
+        logits = outputs.logits[0]
+        probs = torch.softmax(logits, dim=-1).cpu().numpy()
+    # Map label -> index; models commonly use ["Deepfake","Realism"] or ["fake","real"]
+    id2label = _hf_model.config.id2label
+    lab2idx = {v.lower(): k for k, v in _hf_model.config.label2id.items()}
+    # Try a few common names
+    deep_idx = lab2idx.get("deepfake", None)
+    if deep_idx is None:
+        deep_idx = lab2idx.get("fake", None)
+    if deep_idx is None:
+        # Heuristic: choose the class whose label name contains 'fake'
+        deep_idx = next((i for i, name in id2label.items() if "fake" in name.lower()), 0)
+    return float(probs[int(deep_idx)])
+# ====== Face detect / crop (your pipeline) ======
+_mp_face = mp.solutions.face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.4)
+def crop_face(pil_img, pad=0.25):
+    img = np.array(pil_img.convert("RGB"))
+    h, w = img.shape[:2]
+    res = _mp_face.process(cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
+    if not res.detections:
+        return pil_img
+    det = max(res.detections, key=lambda d: d.location_data.relative_bounding_box.width)
+    b = det.location_data.relative_bounding_box
+    x, y, bw, bh = b.xmin, b.ymin, b.width, b.height
+    x1 = int(max(0, (x - pad*bw) * w)); y1 = int(max(0, (y - pad*bh) * h))
+    x2 = int(min(w, (x + bw + pad*bw) * w)); y2 = int(min(h, (y + bh + pad*bh) * h))
+    face = Image.fromarray(img[y1:y2, x1:x2])
+    return face if face.size[0] > 20 and face.size[1] > 20 else pil_img
+# ====== Heuristic fallback (unchanged core) ======
 def _enhance_for_display(pil_img, scale: float):
     arr = np.array(pil_img).astype("float32") * scale
     arr = np.clip(arr, 0, 255).astype("uint8")
 def error_level_analysis(pil_img: Image.Image, quality: int = 90):
     img = pil_img.convert("RGB")
     with io.BytesIO() as buf:
+        img.save(buf, "JPEG", quality=quality); buf.seek(0)
         comp = Image.open(buf).convert("RGB")
     diff = ImageChops.difference(img, comp)
+    extrema = diff.getextrema(); max_diff = max([m for (_, m) in extrema])
     scale = 255.0 / max(1, max_diff)
     ela_vis = _enhance_for_display(diff, scale)
     ela_np = np.array(ela_vis, dtype=np.float32)
 def ela_sweep_mean(pil_img, qualities=(95, 90, 85)):
     vals = []
     for q in qualities:
+        _, m = error_level_analysis(pil_img, quality=q); vals.append(m)
     return float(max(vals)), float(np.mean(vals))
+def fft_high_freq_ratio(pil_img: Image.Image):
     y = pil_img.convert("YCbCr").split()[0]
+    gray = np.array(y, dtype=np.float32)/255.0
     h, w = gray.shape
     wy, wx = np.hanning(h)[:, None], np.hanning(w)[None, :]
     F = np.fft.fftshift(np.fft.fft2(gray * (wy * wx)))
     mag = np.log1p(np.abs(F))
+    cy, cx = h//2, w//2
+    yy, xx = np.ogrid[:h, :w]; dist = np.sqrt((yy - cy)**2 + (xx - cx)**2)
     r_low = min(h, w) * 0.08
+    low = float(mag[dist <= r_low].sum()); high = float(mag[dist > r_low].sum())
+    return None, float(high / (high + low + 1e-9))
+def noise_inconsistency(pil_img: Image.Image):
     y = pil_img.convert("YCbCr").split()[0]
     img = np.array(y, dtype=np.float32)
+    lap = cv2.Laplacian(img, cv2.CV_32F, ksize=3); lap_abs = np.abs(lap)
+    tile = 32; H, W = lap_abs.shape; vals = []
     for yy in range(0, H, tile):
         for xx in range(0, W, tile):
             patch = lap_abs[yy:min(yy+tile, H), xx:min(xx+tile, W)]
+            if patch.size: vals.append(patch.var())
+    if not vals: return None, 0.0
     vals = np.array(vals, dtype=np.float32)
     score = float(vals.std() / (vals.mean() + 1e-9))
     return None, float(np.tanh(score / 5.0))
+def combine_scores(ela_mean, hf_ratio, noise_incons_score):
     w1, w2, w3 = 0.30, 0.40, 0.30
     s_ela = np.clip(ela_mean * 3.0, 0, 1)
     s_hf  = np.clip((hf_ratio - 0.65) / 0.25, 0, 1)
+    s_noi = np.clip(noise_incons_score, 0, 1)
+    conf = float(w1*s_ela + w2*s_hf + w3*s_noi)
+    label = "Likely Manipulated" if conf >= 0.65 else "Likely Authentic"
+    return label, conf
+# ====== Result card ======
+def _result_card(label: str, conf: float, note: str | None = None) -> str:
     pct = max(0.0, min(1.0, conf)) * 100.0
     color = "#d84a4a" if label.startswith("Likely Manipulated") else "#2e7d32"
     bar_bg = "#e9ecef"
+    extra = f"<div style='color:#6b7280;font-size:12px;margin-top:10px;text-align:center;'>{note}</div>" if note else ""
     return f"""
     <div style="max-width:860px;margin:0 auto;">
       <div style="border:1px solid #e5e7eb;border-radius:14px;padding:18px 20px;background:#fff;
           <div style="height:100%;width:{pct:.4f}%;background:{color};"></div>
         </div>
       </div>
+      {extra}
     </div>
     """
+# ====== Inference ======
+def analyze(pil_img: Image.Image):
     if pil_img is None:
         return _result_card("Likely Authentic", 0.0)
+    face = crop_face(pil_img).convert("RGB")
+    if _try_load_hf():
+        prob_fake = _hf_predict_proba(face)
+        label = "Likely Manipulated" if prob_fake >= 0.5 else "Likely Authentic"
+        note  = f"HF model: {HF_MODEL_ID}"
+        return _result_card(label, prob_fake, note=note)
+    # Fallback heuristic (if HF model failed)
+    face = face.resize((512, 512))
+    _, ela_mean = error_level_analysis(face, quality=90)
+    _, hf_ratio  = fft_high_freq_ratio(face)
+    _, noi_score = noise_inconsistency(face)
+    label, conf = combine_scores(ela_mean, hf_ratio, noi_score)
+    return _result_card(label, conf, note="Heuristic fallback")
+# ====== UI ======
 CUSTOM_CSS = """
 .gradio-container {max-width: 980px !important;}
 .sleek-card {
   border: 1px solid #e5e7eb; border-radius: 16px; background: #fff;
   box-shadow: 0 2px 10px rgba(16,24,40,.04); padding: 18px;
 }
 """
+with gr.Blocks(title="Deepfake Detector (Pretrained HF Model)", css=CUSTOM_CSS, theme=gr.themes.Soft()) as demo:
+    gr.Markdown("<h2 style='text-align:center;margin-bottom:6px;'>Deepfake Detector</h2>"
+                "<p style='text-align:center;color:#6b7280;'>Face-crop → pretrained classifier → single likelihood.</p>")
     with gr.Row():
         with gr.Column(scale=6, elem_classes=["sleek-card"]):
+            inp = gr.Image(type="pil", label="Upload / Paste Image",
+                           sources=["upload", "webcam", "clipboard"],
+                           height=420, show_label=True, interactive=True)
             btn = gr.Button("Analyze", variant="primary", size="lg")
         with gr.Column(scale=6):
             out = gr.HTML()
+    btn.click(analyze, inputs=inp, outputs=out)
+    inp.change(analyze, inputs=inp, outputs=out)
 if __name__ == "__main__":
     demo.launch()