Spaces:

vaniv
/

deepfakedetect

Sleeping

App Files Files Community

vaniv commited on Nov 6, 2025

Commit

ad7b882

verified ·

1 Parent(s): 34aff01

Update app.py

Browse files

Files changed (1) hide show

app.py +229 -109

app.py CHANGED Viewed

@@ -1,113 +1,233 @@
-import os
-import typing as t
-import gradio as gr
 import numpy as np
-import tensorflow as tf
-from tensorflow.keras.layers import (Input, Conv2D, BatchNormalization,
-                                     MaxPooling2D, Flatten, Dropout, Dense, LeakyReLU)
-from tensorflow.keras.models import Model
-from PIL import Image
-# Paths
-CUSTOM_MODEL_PATH = "model.h5"               # optional: full Keras model
-MESO_WEIGHTS_PATH = "weights/Meso4_DF.weights.h5"       # your weights-only file
-LABELS = ["real", "fake"]                    # index 0..1 (we'll compute both scores)
-# Globals
-MODEL: t.Optional[tf.keras.Model] = None
-IS_MESO = False
-TARGET_SIZE = (256, 256)     # your notebook used 256×256
-THRESHOLD = 0.5              # sigmoid > 0.5 => fake
-def build_meso4() -> tf.keras.Model:
-    x = Input(shape=(TARGET_SIZE[0], TARGET_SIZE[1], 3))
-    x1 = Conv2D(8, (3, 3), padding='same', activation='relu')(x)
-    x1 = BatchNormalization()(x1)
-    x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1)
-    x2 = Conv2D(8, (5, 5), padding='same', activation='relu')(x1)
-    x2 = BatchNormalization()(x2)
-    x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2)
-    x3 = Conv2D(16, (5, 5), padding='same', activation='relu')(x2)
-    x3 = BatchNormalization()(x3)
-    x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3)
-    x4 = Conv2D(16, (5, 5), padding='same', activation='relu')(x3)
-    x4 = BatchNormalization()(x4)
-    x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4)
-    y = Flatten()(x4)
-    y = Dropout(0.5)(y)
-    y = Dense(16)(y)
-    y = LeakyReLU(alpha=0.1)(y)
-    y = Dropout(0.5)(y)
-    y = Dense(1, activation='sigmoid')(y)
-    model = Model(inputs=x, outputs=y)
-    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
-    return model
-def _load_model():
-    """Load a full Keras model if present; otherwise build Meso4 and load weights."""
-    global MODEL, IS_MESO
-    # 1) Full model (optional)
-    if os.path.exists(CUSTOM_MODEL_PATH):
-        try:
-            MODEL = tf.keras.models.load_model(CUSTOM_MODEL_PATH, compile=False)
-            IS_MESO = False
-            print("Loaded custom model from model.h5")
-            return
-        except Exception as e:
-            print("Failed to load model.h5:", e)
-    # 2) Meso4 + weights (your case)
-    if os.path.exists(MESO_WEIGHTS_PATH):
-        MODEL = build_meso4()
-        MODEL.load_weights(MESO_WEIGHTS_PATH)
-        IS_MESO = True
-        print("Loaded Meso4 with weights:", MESO_WEIGHTS_PATH)
-        return
-    # 3) Hard fail (don’t silently switch to ImageNet; this is a deepfake app)
-    raise RuntimeError(
-        "No model found. Upload either model.h5 or weights/Meso4_DF to the Space."
-    )
-def _preprocess(img: Image.Image) -> np.ndarray:
-    img = img.convert("RGB").resize(TARGET_SIZE)
-    arr = np.array(img).astype("float32") / 255.0
-    return np.expand_dims(arr, axis=0)
-def predict(image: Image.Image):
-    if image is None:
-        return {"real": 0.0, "fake": 0.0}, None, "Upload an image."
-    x = _preprocess(image)
-    prob_fake = float(MODEL.predict(x, verbose=0)[0][0])
-    prob_real = 1.0 - prob_fake
-    label = "fake" if prob_fake >= THRESHOLD else "real"
-    msg = f"Prediction: {label.upper()}  |  fake={prob_fake:.2f}, real={prob_real:.2f}"
-    # Return both scores for the Label component
-    return {"real": prob_real, "fake": prob_fake}, image, msg
-# Init
-_load_model()
-with gr.Blocks(title="Deepfake Detector (Meso4)") as demo:
-    gr.Markdown("# Deepfake Detector (Meso4)\n"
-                "Upload a face image (or a frame from a video). The model outputs real vs fake.")
-    with gr.Row():
-        with gr.Column(scale=1):
-            inp = gr.Image(type="pil", label="Upload image")
-            btn = gr.Button("Predict")
-        with gr.Column(scale=1):
-            out_label = gr.Label(num_top_classes=2, label="Scores")
-            out_img = gr.Image(type="pil", label="Preview")
-            out_text = gr.Markdown()
-    btn.click(fn=predict, inputs=inp, outputs=[out_label, out_img, out_text])
 if __name__ == "__main__":
     demo.launch()

+import io, os, tempfile, math
 import numpy as np
+import gradio as gr
+from PIL import Image, ImageChops, ImageFilter
+import cv2
+from skimage import exposure
+# ---------- Forensic primitives ----------
+def error_level_analysis(pil_img: Image.Image, quality: int = 90):
+    """
+    ELA: save as JPEG (quality q), diff with original, enhance to visualize anomalies.
+    Returns: ELA image (PIL), mean ELA intensity (float)
+    """
+    img = pil_img.convert("RGB")
+    with io.BytesIO() as buffer:
+        img.save(buffer, "JPEG", quality=quality)
+        buffer.seek(0)
+        comp = Image.open(buffer).convert("RGB")
+    diff = ImageChops.difference(img, comp)
+    # amplify differences to be human-visible
+    extrema = diff.getextrema()
+    max_diff = max([m for (_, m) in extrema])
+    scale = 255.0 / max(1, max_diff)
+    ela = ImageEnhance(diff, scale)
+    ela_np = np.array(ela)
+    mean_intensity = float(ela_np.mean() / 255.0)
+    return ela, mean_intensity
+def ImageEnhance(pil_img: Image.Image, scale: float):
+    arr = np.array(pil_img).astype("float32") * scale
+    arr = np.clip(arr, 0, 255).astype("uint8")
+    return Image.fromarray(arr)
+def fft_high_freq_ratio(pil_img: Image.Image):
+    """
+    Compute high-frequency energy ratio from grayscale FFT.
+    Returns: spectrum image (PIL), hf_ratio (float in [0,1] approx)
+    """
+    gray = np.array(pil_img.convert("L"), dtype=np.float32) / 255.0
+    # windowing to reduce edge artifacts
+    h, w = gray.shape
+    win_y = np.hanning(h)[:, None]
+    win_x = np.hanning(w)[None, :]
+    grayw = gray * (win_y * win_x)
+    F = np.fft.fftshift(np.fft.fft2(grayw))
+    mag = np.log1p(np.abs(F))
+    # visualize spectrum normalized
+    spec = (mag / mag.max() * 255).astype("uint8")
+    spec_img = Image.fromarray(spec)
+    # high vs low freq using radius threshold
+    cy, cx = h // 2, w // 2
+    yy, xx = np.ogrid[:h, :w]
+    dist = np.sqrt((yy - cy) ** 2 + (xx - cx) ** 2)
+    r_low = min(h, w) * 0.08  # low radius
+    mask_low = dist <= r_low
+    low_energy = mag[mask_low].sum()
+    high_energy = mag[~mask_low].sum()
+    hf_ratio = float(high_energy / (high_energy + low_energy + 1e-9))
+    return spec_img, hf_ratio
+def noise_map_score(pil_img: Image.Image):
+    """
+    Laplacian variance map as a proxy for local sharpness / noise consistency.
+    Returns: heatmap (PIL), inconsistency score (float)
+    """
+    img = np.array(pil_img.convert("L"))
+    lap = cv2.Laplacian(img, cv2.CV_32F, ksize=3)
+    # normalize heatmap for display
+    lap_abs = np.abs(lap)
+    heat = (lap_abs / (lap_abs.max() + 1e-9) * 255).astype("uint8")
+    heat_eq = exposure.equalize_adapthist(heat, clip_limit=0.01)
+    heat_disp = Image.fromarray((heat_eq * 255).astype("uint8"))
+    # inconsistency: std dev of local variance over tiles
+    tile = 32
+    H, W = img.shape
+    vars_ = []
+    for y in range(0, H, tile):
+        for x in range(0, W, tile):
+            patch = lap_abs[y:min(y+tile, H), x:min(x+tile, W)]
+            if patch.size > 0:
+                vars_.append(patch.var())
+    vars_ = np.array(vars_, dtype=np.float32)
+    score = float((vars_.std() / (vars_.mean() + 1e-9)))  # higher = more inconsistent
+    # squash to approx [0,1]
+    score_norm = float(np.tanh(score / 5.0))
+    return heat_disp, score_norm
+# ---------- Simple decision rule ----------
+def combine_scores(ela_mean, hf_ratio, noise_incons):
+    """
+    Combine three signals into a simple confidence of manipulation.
+    Tuned conservatively to avoid false alarms on clean photos.
+    """
+    # weights (can tweak)
+    w1, w2, w3 = 0.4, 0.35, 0.25
+    # normalize features roughly to [0,1]
+    s_ela = np.clip(ela_mean * 2.5, 0, 1)         # more ELA intensity -> more suspect
+    s_hf  = np.clip((hf_ratio - 0.65) / 0.25, 0, 1)  # lots of HF energy -> suspect
+    s_noi = np.clip(noise_incons, 0, 1)
+    suspect = float(w1 * s_ela + w2 * s_hf + w3 * s_noi)
+    label = "Likely Manipulated" if suspect >= 0.55 else "Likely Authentic"
+    return label, suspect
+# ---------- Gradio handlers ----------
+def analyze_image(pil_img: Image.Image):
+    if pil_img is None:
+        return {}, None, None, None, "Upload an image"
+    # Standardize size for stable scores (keeps aspect, pads)
+    pil_img = pil_img.convert("RGB")
+    pil_img = pil_img.resize((512, 512))
+    ela_img, ela_mean = error_level_analysis(pil_img, quality=90)
+    spec_img, hf_ratio = fft_high_freq_ratio(pil_img)
+    noise_img, noise_incons = noise_map_score(pil_img)
+    label, conf = combine_scores(ela_mean, hf_ratio, noise_incons)
+    scores = {
+        "Confidence manipulated": round(conf, 3),
+        "ELA mean": round(ela_mean, 3),
+        "HF ratio": round(hf_ratio, 3),
+        "Noise inconsistency": round(noise_incons, 3)
+    }
+    msg = f"Result: **{label}**  —  confidence: {conf:.2f}\n\n" \
+          f"*ELA={ela_mean:.3f}, HF={hf_ratio:.3f}, Noise={noise_incons:.3f}*"
+    return scores, pil_img, ela_img, spec_img, noise_img, msg
+def analyze_video(video_file):
+    if video_file is None:
+        return {}, None, None, None, None, "Upload a short video (<= 10–15s)"
+    # write to temp, sample frames
+    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    tmp.write(video_file.read()); tmp.flush(); tmp.close()
+    cap = cv2.VideoCapture(tmp.name)
+    frames = []
+    idx = 0
+    while True:
+        ret, frame = cap.read()
+        if not ret: break
+        if idx % 15 == 0:     # sample every 15th frame
+            frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
+            if len(frames) >= 8: break
+        idx += 1
+    cap.release()
+    os.unlink(tmp.name)
+    if not frames:
+        return {}, None, None, None, None, "Couldn’t read frames; try a different/shorter video."
+    # analyze first frame for visuals, average scores across all
+    scores_list = []
+    vis_sample = frames[0].resize((512, 512)).convert("RGB")
+    ela_img, ela_mean = error_level_analysis(vis_sample)
+    spec_img, hf_ratio = fft_high_freq_ratio(vis_sample)
+    noise_img, noise_incons = noise_map_score(vis_sample)
+    # avg over all frames
+    elas, hfs, noises = [ela_mean], [hf_ratio], [noise_incons]
+    for f in frames[1:]:
+        f = f.resize((512, 512)).convert("RGB")
+        _, em = error_level_analysis(f)
+        _, hr = fft_high_freq_ratio(f)
+        _, ns = noise_map_score(f)
+        elas.append(em); hfs.append(hr); noises.append(ns)
+    ela_m = float(np.mean(elas))
+    hf_m = float(np.mean(hfs))
+    noi_m = float(np.mean(noises))
+    label, conf = combine_scores(ela_m, hf_m, noi_m)
+    scores = {
+        "Confidence manipulated": round(conf, 3),
+        "ELA mean (avg)": round(ela_m, 3),
+        "HF ratio (avg)": round(hf_m, 3),
+        "Noise inconsistency (avg)": round(noi_m, 3)
+    }
+    msg = f"Result: **{label}**  —  confidence: {conf:.2f}\n\n" \
+          f"*ELA={ela_m:.3f}, HF={hf_m:.3f}, Noise={noi_m:.3f}*\n" \
+          f"_Note: rule-based (no ML), indicative only._"
+    return scores, vis_sample, ela_img, spec_img, noise_img, msg
+# ---------- UI ----------
+with gr.Blocks(title="Deepfake Forensics (No-ML)") as demo:
+    gr.Markdown("## Deepfake Forensics (No-ML)\n"
+                "Upload an **image** or a short **video**. We run three classical forensic checks:\n"
+                "- **ELA** (Error Level Analysis)\n- **Frequency Spectrum** (high-freq energy)\n- **Noise Consistency** (Laplacian map)\n"
+                "Outputs a **Likely Authentic / Likely Manipulated** decision with visual evidence.")
+    with gr.Tab("Image"):
+        with gr.Row():
+            with gr.Column(scale=1):
+                img_in = gr.Image(type="pil", label="Upload image")
+                btn = gr.Button("Analyze")
+            with gr.Column(scale=2):
+                scores = gr.Label(label="Scores")
+                img_std = gr.Image(label="Normalized Input")
+                img_ela = gr.Image(label="ELA Heatmap")
+                img_fft = gr.Image(label="Frequency Spectrum")
+                img_noise = gr.Image(label="Noise/Sharpness Map")
+                msg = gr.Markdown()
+        btn.click(analyze_image, inputs=img_in,
+                  outputs=[scores, img_std, img_ela, img_fft, img_noise, msg])
+    with gr.Tab("Video (optional)"):
+        with gr.Row():
+            with gr.Column(scale=1):
+                vid_in = gr.Video(label="Upload short MP4 (<=10–15s)")
+                btnv = gr.Button("Analyze Video")
+            with gr.Column(scale=2):
+                vscores = gr.Label(label="Scores (avg over frames)")
+                vimg_std = gr.Image(label="Frame Preview")
+                vimg_ela = gr.Image(label="ELA Heatmap (frame)")
+                vimg_fft = gr.Image(label="Frequency Spectrum (frame)")
+                vimg_noise = gr.Image(label="Noise/Sharpness Map (frame)")
+                vmsg = gr.Markdown()
+        btnv.click(analyze_video, inputs=vid_in,
+                   outputs=[vscores, vimg_std, vimg_ela, vimg_fft, vimg_noise, vmsg])
 if __name__ == "__main__":
     demo.launch()