Spaces:

MetaQu
/

ai-image-detector

Sleeping

File size: 8,053 Bytes

# app.py
import gradio as gr
from transformers import pipeline
from PIL import Image, ExifTags
import numpy as np

# Try import cv2 (opencv-headless). If not available, fallback ke numpy-only functions.
try:
    import cv2
    HAS_CV2 = True
except Exception:
    cv2 = None
    HAS_CV2 = False

# ------------------------
# Load HF detector (may require torch installed in requirements.txt)
# ------------------------
try:
    hf_detector = pipeline("image-classification", model="umm-maybe/AI-image-detector")
except Exception as e:
    hf_detector = None
    print("Warning: hf_detector gagal dimuat:", e)

# ------------------------
# Forensic helper functions (works with or without cv2)
# ------------------------
def pil_to_gray_array(img: Image.Image):
    return np.array(img.convert("L"), dtype=np.float32)

def estimate_blur(img: Image.Image):
    arr = pil_to_gray_array(img)
    if HAS_CV2:
        return float(cv2.Laplacian(arr.astype(np.uint8), cv2.CV_64F).var())
    # fallback: gradient variance
    gx, gy = np.gradient(arr)
    return float(np.var(gx + gy))

def estimate_noise(img: Image.Image):
    arr = pil_to_gray_array(img)
    if HAS_CV2:
        # remove low-frequency via gaussian blur then std
        blurred = cv2.GaussianBlur(arr, (5,5), 0)
        noise = arr - blurred
        return float(np.std(noise))
    # fallback
    blurred = np.mean(arr)
    noise = arr - blurred
    return float(np.std(noise))

def block_highfreq_ratio(img: Image.Image, block=8):
    # compute ratio of high-frequency energy per 8x8 block via FFT (fallback for DCT)
    arr = pil_to_gray_array(img)
    h, w = arr.shape
    # pad to multiple of block
    ph = ((block - (h % block)) % block)
    pw = ((block - (w % block)) % block)
    if ph or pw:
        arr = np.pad(arr, ((0, ph), (0, pw)), mode='reflect')
    H, W = arr.shape
    total_energy = 0.0
    low_energy = 0.0
    # iterate blocks (vectorized)
    for i in range(0, H, block):
        for j in range(0, W, block):
            b = arr[i:i+block, j:j+block]
            # 2D FFT
            F = np.fft.fft2(b)
            mag = np.abs(F)
            total_energy += mag.sum()
            # low freq: center-ish -> take top-left 2x2 as low freq approx
            low_energy += mag[0:2, 0:2].sum()
    if total_energy <= 1e-9:
        return 0.0
    high_ratio = float((total_energy - low_energy) / total_energy)  # 0..1
    return high_ratio

def edge_std(img: Image.Image):
    arr = pil_to_gray_array(img)
    if HAS_CV2:
        edges = cv2.Canny(arr.astype(np.uint8), 100, 200)
        return float(np.std(edges))
    gx, gy = np.gradient(arr)
    edges = np.hypot(gx, gy)
    return float(np.std(edges))

def has_camera_exif(img: Image.Image):
    try:
        exif = img._getexif()
        if not exif:
            return False
        for tag, val in exif.items():
            name = ExifTags.TAGS.get(tag, tag)
            if name in ("Make", "Model", "LensModel", "FNumber", "ExposureTime"):
                return True
    except:
        pass
    return False

# ------------------------
# Scoring / Ensemble logic
# ------------------------
def final_ai_score_from_components(hf_label, hf_conf, blur, noise, hfreq_ratio, edges, exif_present):
    # hf_conf is 0..1
    # 1) HF detector contribution
    if hf_label is None:
        hf_contrib = 0.0
    else:
        lab = hf_label.lower()
        if any(x in lab for x in ("fake","artificial","ai")):
            hf_contrib = hf_conf * 100.0
        elif "human" in lab or "real" in lab:
            # do not trust 'human' fully; translate into moderate ai signal
            hf_contrib = (1.0 - hf_conf) * 100.0 * 0.6
        else:
            hf_contrib = (1.0 - hf_conf) * 100.0 * 0.8

    # 2) Forensic contributions -> produce scores 0..100 where larger = more likely AI
    # noise: low noise => AI-ish
    noise_norm = noise / 100.0  # normalize roughly; adjust if needed
    noise_score = max(0.0, min(1.0, 1.0 - noise_norm)) * 100.0

    # blur: low variance (very smooth) => AI-ish
    blur_norm = blur / 500.0
    blur_score = max(0.0, min(1.0, 1.0 - blur_norm)) * 100.0

    # high-frequency ratio: very low high-freq => too-smooth => AI-ish (we expect hfreq_ratio small -> AI)
    # hfreq_ratio is 0..1, low -> AI
    hfreq_score = max(0.0, min(1.0, 0.2 - hfreq_ratio) / 0.2) * 100.0  # thresholding at ~0.2

    # edges: low edge std => AI-ish
    edges_norm = edges / 30.0
    edge_score = max(0.0, min(1.0, 1.0 - edges_norm)) * 100.0

    # combine forensic scores (weights can be tuned)
    forensic_score = (0.35 * noise_score + 0.30 * blur_score + 0.20 * hfreq_score + 0.15 * edge_score)

    # 3) Combine HF + Forensic
    combined = 0.6 * hf_contrib + 0.4 * forensic_score  # 0..100

    # 4) EXIF adjustment: if EXIF present, reduce AI score significantly
    if exif_present:
        combined = max(0.0, combined - 30.0)

    # Clamp
    combined = max(0.0, min(100.0, combined))
    return combined, {
        "hf_contrib": hf_contrib,
        "forensic_score": forensic_score,
        "noise_score": noise_score,
        "blur_score": blur_score,
        "hfreq_score": hfreq_score,
        "edge_score": edge_score
    }

# ------------------------
# Main detect function
# ------------------------
def detect_image(img: Image.Image):
    try:
        # ensure PIL image
        if not isinstance(img, Image.Image):
            img = Image.fromarray(np.array(img))

        # HF detector inference (if available)
        hf_label = None
        hf_conf = 0.0
        if hf_detector is not None:
            try:
                res = hf_detector(img, top_k=1)
                if isinstance(res, list) and len(res) > 0:
                    hf_label = res[0].get("label", "")
                    hf_conf = float(res[0].get("score", 0.0))
            except Exception as e:
                # fallback: ignore
                hf_label = None
                hf_conf = 0.0

        # Forensic measures
        blur = estimate_blur(img)
        noise = estimate_noise(img)
        hfreq = block_highfreq_ratio(img)
        edges = edge_std(img)
        exif_ok = has_camera_exif(img)

        # Compute final AI score (0..100)
        ai_score, comps = final_ai_score_from_components(hf_label, hf_conf, blur, noise, hfreq, edges, exif_ok)
        real_score = round(100.0 - ai_score, 2)
        ai_score = round(ai_score, 2)

        # Interpretations / labels
        if ai_score >= 90:
            verdict = "🤖 Gambar ini TERLALU MOGOK: Hasil AI (sangat tinggi)"
        elif ai_score >= 60:
            verdict = "🤖 Gambar ini kemungkinan besar DIHASILKAN AI"
        elif ai_score <= 15:
            verdict = "✅ Gambar ini tampak ASLI (sangat tinggi)"
        elif ai_score <= 40:
            verdict = "✅ Gambar ini kemungkinan besar ASLI"
        else:
            verdict = f"⚖️ Gambar ini {ai_score}% AI / {real_score}% Asli"

        # Build output message
        out = f"""
### Hasil Deteksi:
{verdict}

**Persentase:** {ai_score}% AI  /  {real_score}% Asli

**Model Prediksi:** {hf_label if hf_label else 'N/A'} ({hf_conf:.2f})
**Forensik (angka):**
- Blur (var Laplacian / grad-var): {blur:.2f}
- Noise (std highpass): {noise:.2f}
- HighFreq Ratio (block FFT): {hfreq:.3f}
- Edge STD: {edges:.2f}
- EXIF Kamera: {"Ada" if exif_ok else "Tidak"}

**Komponen skor (internal):**
- hf_contrib: {comps['hf_contrib']:.2f}
- forensic_score: {comps['forensic_score']:.2f}
- noise_score: {comps['noise_score']:.2f}
- blur_score: {comps['blur_score']:.2f}
- hfreq_score: {comps['hfreq_score']:.2f}
- edge_score: {comps['edge_score']:.2f}
"""
        return out
    except Exception as e:
        return f"⚠️ Terjadi error saat deteksi: {str(e)}"

# Gradio UI
iface = gr.Interface(
    fn=detect_image,
    inputs=gr.Image(type="pil"),
    outputs="markdown",
    title="Improved Hybrid AI vs Real Detector",
    description="Gabungan model HF + forensik (noise, blur, DCT/FFT block, edge, EXIF). Tidak ada jaminan 100%."
)

if __name__ == "__main__":
    iface.launch()