RightINFO / image_detector.py
anshul-rohilla4
fix nyuad local path and public host
c441b91
# ============================================================
# AI Image Detection Module
# Models: dima806 (primary) + umm-maybe (secondary) + NYUAD (fallback)
# Physics: FFT frequency analysis + Noise analysis
# ============================================================
#
# SETUP:
# pip install torch torchvision transformers pillow numpy requests
# pip install beautifulsoup4 opencv-python-headless scikit-learn
#
# USAGE:
# from image_detector import predict_image, evaluate_dataset
# ============================================================
import os
import numpy as np
import torch
import requests
import cv2
from PIL import Image
from io import BytesIO
from transformers import (
AutoModelForImageClassification,
ViTImageProcessor,
pipeline
)
# ============================================================
# MODEL LOADING
# ============================================================
print("Loading image detection models...")
# ── Model 1: dima806 β€” primary, strong on general AI images ──
try:
dima_pipe = pipeline("image-classification", model="dima806/ai_vs_real_image_detection", device=0 if torch.cuda.is_available() else -1)
DIMA_AVAILABLE = True
print("βœ“ dima806 loaded")
except Exception as e:
print(f"βœ— dima806 not available: {e}")
DIMA_AVAILABLE = False
# ── Model 2: umm-maybe β€” strong on Midjourney/SDXL ───────────
try:
umm_pipe = pipeline("image-classification", model="umm-maybe/AI-image-detector", device=0 if torch.cuda.is_available() else -1)
UMM_AVAILABLE = True
print("βœ“ umm-maybe loaded")
except Exception as e:
print(f"βœ— umm-maybe not available: {e}")
UMM_AVAILABLE = False
# ── Model 3: NYUAD β€” fallback, trained on DALL-E + SD ────────
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
NYUAD_PATH = os.path.join(BASE_DIR, "nyuad_model")
try:
nyuad_processor = ViTImageProcessor.from_pretrained(NYUAD_PATH, local_files_only=True)
nyuad_model = AutoModelForImageClassification.from_pretrained(NYUAD_PATH, trust_remote_code=True, local_files_only=True)
nyuad_model.eval()
NYUAD_AVAILABLE = True
print("βœ“ NYUAD loaded")
except Exception as e:
print(f"βœ— NYUAD not available: {e}")
NYUAD_AVAILABLE = False
print("Models ready.\n")
# ============================================================
# INDIVIDUAL MODEL PREDICTORS
# ============================================================
def predict_dima(image: Image.Image) -> dict | None:
"""
dima806 β€” primary model.
Best for: general AI images, news photos, portraits.
"""
if not DIMA_AVAILABLE:
return None
try:
results = dima_pipe(image.convert("RGB"))
ai_score = next(
(r["score"] for r in results if r["label"].upper() in ["FAKE", "AI", "ARTIFICIAL"]),
None
)
if ai_score is None:
real_score = next((r["score"] for r in results if r["label"].upper() in ["REAL", "HUMAN"]), 0.5)
ai_score = 1 - real_score
return {
"model": "dima806",
"label": "AI-generated" if ai_score >= 0.5 else "Real",
"ai_score": round(float(ai_score), 4)
}
except Exception as e:
print(f"dima806 error: {e}")
return None
def predict_umm(image: Image.Image) -> dict | None:
"""
umm-maybe β€” secondary model.
Best for: Midjourney, SDXL, newer diffusion models.
"""
if not UMM_AVAILABLE:
return None
try:
results = umm_pipe(image.convert("RGB"))
ai_score = next(
(r["score"] for r in results if r["label"].upper() in ["FAKE", "AI", "ARTIFICIAL", "GENERATED"]),
None
)
if ai_score is None:
real_score = next((r["score"] for r in results if r["label"].upper() in ["REAL", "HUMAN"]), 0.5)
ai_score = 1 - real_score
return {
"model": "umm-maybe",
"label": "AI-generated" if ai_score >= 0.5 else "Real",
"ai_score": round(float(ai_score), 4)
}
except Exception as e:
print(f"umm-maybe error: {e}")
return None
def predict_nyuad(image: Image.Image) -> dict | None:
"""
NYUAD ViT β€” fallback model.
Best for: DALL-E, Stable Diffusion 1.x/2.x.
"""
if not NYUAD_AVAILABLE:
return None
try:
image = image.convert("RGB")
inputs = nyuad_processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = nyuad_model(**inputs)
probs = torch.softmax(outputs.logits, dim=-1).squeeze().tolist()
scores = {nyuad_model.config.id2label[i]: round(p, 4) for i, p in enumerate(probs)}
prediction = max(scores, key=scores.get)
ai_score = round(1 - scores.get("real", 0), 4)
return {
"model": "NYUAD",
"label": "AI-generated" if prediction != "real" else "Real",
"ai_score": ai_score,
"scores": scores
}
except Exception as e:
print(f"NYUAD error: {e}")
return None
# ============================================================
# PHYSICS-BASED ANALYSIS
# ============================================================
def fft_analysis(image: Image.Image) -> dict | None:
"""
FFT Frequency Analysis.
Real photographs have a natural frequency falloff due to lens optics
and sensor physics β€” high frequencies decay smoothly.
AI images break this pattern:
- Diffusion models produce unnatural high-frequency peaks
- GAN images have characteristic checkerboard artifacts in frequency domain
- Both tend to be unnaturally smooth in mid-frequencies
This is generator-agnostic β€” works on any AI model because it
exploits the physics of real cameras, not model-specific artifacts.
"""
try:
gray = np.array(image.convert("L"), dtype=np.float32)
fft = np.fft.fft2(gray)
fft_shift = np.fft.fftshift(fft)
magnitude = np.log(np.abs(fft_shift) + 1)
h, w = magnitude.shape
# Central peak ratio β€” real photos have stronger center dominance
center_val = magnitude[h//2, w//2]
mean_mag = magnitude.mean()
center_ratio = float(center_val / (mean_mag + 1e-8))
# High frequency corners β€” AI images leak more energy into corners
corners = np.concatenate([
magnitude[:h//8, :w//8 ].flatten(),
magnitude[:h//8, -w//8:].flatten(),
magnitude[-h//8:, :w//8 ].flatten(),
magnitude[-h//8:, -w//8:].flatten()
])
hf_ratio = float(corners.mean() / (mean_mag + 1e-8))
# Mid-frequency uniformity β€” AI images are too smooth here
mid_ring = magnitude[h//4:3*h//4, w//4:3*w//4]
mid_std = float(mid_ring.std() / (magnitude.std() + 1e-8))
# Radial frequency falloff β€” real images follow power law decay
# AI images deviate from this natural falloff
cy, cx = h // 2, w // 2
y_idx, x_idx = np.ogrid[:h, :w]
radius = np.sqrt((y_idx - cy)**2 + (x_idx - cx)**2).astype(int)
max_r = min(cy, cx)
radial_profile = np.array([magnitude[radius == r].mean() for r in range(1, max_r)])
# Real images: profile decays monotonically
# AI images: profile has bumps and inconsistencies
diffs = np.diff(radial_profile)
non_monotonic = float((diffs > 0).mean()) # fraction of increasing steps
# Combine signals into AI score
# Higher center_ratio β†’ more real
# Higher hf_ratio β†’ more AI
# Lower mid_std β†’ more AI (too smooth)
# Higher non_monotonic β†’ more AI (unnatural falloff)
center_score = min(max(1 - (center_ratio - 3) / 10, 0), 1)
hf_score = min(max(hf_ratio / 0.8, 0), 1)
smoothness_score = min(max(1 - mid_std, 0), 1)
falloff_score = min(max(non_monotonic * 2, 0), 1)
ai_score = round(
0.25 * center_score +
0.30 * hf_score +
0.25 * smoothness_score +
0.20 * falloff_score,
4
)
return {
"model": "FFT Analysis",
"label": "AI-generated" if ai_score >= 0.5 else "Real",
"ai_score": ai_score,
"center_ratio": round(center_ratio, 3),
"hf_ratio": round(hf_ratio, 3),
"mid_std": round(mid_std, 3),
"non_monotonic": round(non_monotonic, 3)
}
except Exception as e:
print(f"FFT error: {e}")
return None
def noise_analysis(image: Image.Image) -> dict | None:
"""
Sensor Noise Analysis β€” NEW, replaces EXIF.
Real camera sensors produce characteristic random noise patterns
(photon shot noise + read noise). This noise follows specific
statistical distributions and is spatially random.
AI generated images are mathematically smooth β€” they lack this
natural noise signature entirely, or have unnatural periodic noise
from the generation process.
This is more reliable than EXIF because:
- EXIF is stripped by social media platforms
- Noise is physically embedded in the pixel values
- Cannot be removed without degrading the image
"""
try:
img_array = np.array(image.convert("RGB"), dtype=np.float32)
# Extract noise by subtracting a smoothed version
smoothed = cv2.GaussianBlur(img_array, (5, 5), 0)
noise = img_array - smoothed
# Real camera noise properties
noise_std = float(noise.std())
noise_mean = float(np.abs(noise).mean())
# Noise should be spatially random β€” check autocorrelation
noise_gray = noise.mean(axis=2)
autocorr = np.corrcoef(noise_gray[:-1].flatten(), noise_gray[1:].flatten())[0, 1]
autocorr = float(autocorr) if not np.isnan(autocorr) else 0.0
# Real images: noise_std typically 3-15, autocorr near 0
# AI images: noise_std typically <2 (too smooth) or >20 (unnatural)
# AI images: autocorr often higher (periodic noise patterns)
# Too smooth β†’ likely AI
smoothness_ai = min(max(1 - (noise_std / 8), 0), 1)
# High autocorrelation β†’ likely AI (periodic patterns)
autocorr_ai = min(max(abs(autocorr) * 2, 0), 1)
# Noise uniformity across channels β€” real cameras have channel-specific noise
channel_stds = [noise[:,:,c].std() for c in range(3)]
channel_var = float(np.std(channel_stds) / (np.mean(channel_stds) + 1e-8))
uniformity_ai = min(max(1 - channel_var * 3, 0), 1) # too uniform β†’ AI
ai_score = round(
0.40 * smoothness_ai +
0.35 * autocorr_ai +
0.25 * uniformity_ai,
4
)
return {
"model": "Noise Analysis",
"label": "AI-generated" if ai_score >= 0.5 else "Real",
"ai_score": ai_score,
"noise_std": round(noise_std, 3),
"autocorr": round(autocorr, 3),
"channel_var": round(channel_var, 3)
}
except Exception as e:
print(f"Noise analysis error: {e}")
return None
# ============================================================
# ENSEMBLE COMBINER
# ============================================================
def predict_image_combined(image: Image.Image) -> dict:
"""
Principled ensemble detection strategy:
1. Run all available deep learning models
2. Run physics-based analysis (FFT + Noise)
3. Combine with confidence-weighted voting:
- Deep learning models: 70% total weight
- Physics analysis: 30% total weight
4. If all models agree β†’ high confidence
If models disagree β†’ flag as uncertain
Confidence disclaimer added for uncertain predictions β€”
honest uncertainty is better than wrong certainty.
"""
results = {}
# ── Deep Learning Models ─────────────────────────────────
dima_result = predict_dima(image)
umm_result = predict_umm(image)
nyuad_result = predict_nyuad(image)
# ── Physics Analysis ──────────────────────────────────────
fft_result = fft_analysis(image)
noise_result = noise_analysis(image)
# ── Collect available scores ──────────────────────────────
dl_scores = []
physics_scores = []
if dima_result:
dl_scores.append(dima_result["ai_score"])
results["dima806"] = dima_result
if umm_result:
dl_scores.append(umm_result["ai_score"])
results["umm_maybe"] = umm_result
if nyuad_result and not (dima_result or umm_result):
# Only use NYUAD if neither primary model available
dl_scores.append(nyuad_result["ai_score"])
results["nyuad"] = nyuad_result
if fft_result:
physics_scores.append(fft_result["ai_score"])
results["fft"] = fft_result
if noise_result:
physics_scores.append(noise_result["ai_score"])
results["noise"] = noise_result
# ── Handle no models available ────────────────────────────
if not dl_scores and not physics_scores:
return {
"label": "Unknown",
"confidence": 0.0,
"ai_score": 0.5,
"warning": "No models available",
"breakdown": results
}
# ── Weighted combination ──────────────────────────────────
scores = []
weights = []
if dl_scores:
dl_avg = sum(dl_scores) / len(dl_scores)
scores.append(dl_avg)
weights.append(0.70)
if physics_scores:
phys_avg = sum(physics_scores) / len(physics_scores)
scores.append(phys_avg)
weights.append(0.30)
total_weight = sum(weights)
final_score = round(sum(s * w / total_weight for s, w in zip(scores, weights)), 4)
# ── Agreement check ───────────────────────────────────────
all_scores = dl_scores + physics_scores
all_labels = [1 if s >= 0.5 else 0 for s in all_scores]
agreement = sum(all_labels) / len(all_labels) if all_labels else 0.5
models_agree = agreement >= 0.75 or agreement <= 0.25
# ── Confidence calculation ────────────────────────────────
raw_confidence = final_score if final_score >= 0.5 else 1 - final_score
# Penalize confidence when models disagree
adjusted_confidence = raw_confidence * (0.7 + 0.3 * (1 if models_agree else 0))
# ── Warning for uncertain predictions ────────────────────
warning = None
if not models_agree:
warning = "Models disagree β€” result may be unreliable. Newer AI generators (Midjourney v6, DALL-E 3, Flux) are harder to detect."
elif adjusted_confidence < 0.65:
warning = "Low confidence prediction. Treat this result with caution."
return {
"label": "AI-generated" if final_score >= 0.5 else "Real",
"confidence": round(float(adjusted_confidence), 4),
"ai_score": final_score,
"models_used": list(results.keys()),
"models_agree": models_agree,
"warning": warning,
"breakdown": results
}
# ============================================================
# EVALUATION β€” run on a folder of labeled images
# ============================================================
def evaluate_dataset(real_folder: str, ai_folder: str, max_images: int = 50) -> dict:
"""
Evaluate the ensemble on a local dataset.
Args:
real_folder: path to folder of real images
ai_folder: path to folder of AI generated images
max_images: max images per class to evaluate
Returns:
dict with accuracy, precision, recall, F1, per-model breakdown
"""
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import json
print(f"\nEvaluating on dataset...")
print(f"Real folder : {real_folder}")
print(f"AI folder : {ai_folder}")
def load_images(folder, label, max_n):
items = []
exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp"}
for fname in os.listdir(folder)[:max_n]:
if os.path.splitext(fname)[1].lower() in exts:
try:
img = Image.open(os.path.join(folder, fname)).convert("RGB")
items.append((img, label, fname))
except Exception:
continue
return items
real_images = load_images(real_folder, 0, max_images)
ai_images = load_images(ai_folder, 1, max_images)
all_images = real_images + ai_images
print(f"Real images : {len(real_images)}")
print(f"AI images : {len(ai_images)}")
print(f"Total : {len(all_images)}\n")
y_true, y_pred, y_scores = [], [], []
per_model_preds = {
"dima806": [], "umm_maybe": [], "nyuad": [],
"fft": [], "noise": []
}
errors = []
for i, (img, label, fname) in enumerate(all_images):
result = predict_image_combined(img)
pred = 1 if result["label"] == "AI-generated" else 0
y_true.append(label)
y_pred.append(pred)
y_scores.append(result["ai_score"])
# Per model predictions
for model_key in per_model_preds:
if model_key in result["breakdown"] and result["breakdown"][model_key]:
score = result["breakdown"][model_key]["ai_score"]
per_model_preds[model_key].append((label, 1 if score >= 0.5 else 0, score))
if pred != label:
errors.append({
"file": fname,
"actual": "AI" if label == 1 else "Real",
"predicted": result["label"],
"score": result["ai_score"],
"warning": result.get("warning")
})
if (i + 1) % 10 == 0:
print(f" Processed {i+1}/{len(all_images)}...")
# ── Overall metrics ───────────────────────────────────────
report = classification_report(y_true, y_pred, target_names=["Real", "AI"], output_dict=True)
cm = confusion_matrix(y_true, y_pred)
try:
auc = roc_auc_score(y_true, y_scores)
except Exception:
auc = None
print("\n" + "="*50)
print("EVALUATION RESULTS")
print("="*50)
print(classification_report(y_true, y_pred, target_names=["Real", "AI"]))
print(f"Confusion Matrix:\n{cm}")
if auc:
print(f"ROC-AUC: {auc:.4f}")
# ── Per model breakdown ───────────────────────────────────
print("\nPer-model breakdown:")
for model_name, preds in per_model_preds.items():
if preds:
mt, mp, _ = zip(*preds)
acc = sum(t == p for t, p in zip(mt, mp)) / len(mt)
print(f" {model_name:<15} accuracy: {acc*100:.1f}% ({len(preds)} images)")
# ── Error analysis ────────────────────────────────────────
print(f"\nErrors ({len(errors)} total):")
for e in errors[:10]:
print(f" [{e['actual']} β†’ {e['predicted']}] {e['file']} (score={e['score']})")
if e["warning"]:
print(f" ⚠ {e['warning']}")
return {
"accuracy": report["accuracy"],
"f1": report["weighted avg"]["f1-score"],
"precision": report["weighted avg"]["precision"],
"recall": report["weighted avg"]["recall"],
"auc": auc,
"confusion_matrix": cm.tolist(),
"errors": errors,
"per_model": {k: len(v) for k, v in per_model_preds.items() if v}
}
# ============================================================
# UTILITY β€” load image from URL
# ============================================================
def load_image_from_url(url: str) -> Image.Image:
headers = {"User-Agent": "Mozilla/5.0"}
resp = requests.get(url, headers=headers, timeout=10)
resp.raise_for_status()
return Image.open(BytesIO(resp.content)).convert("RGB")
# ============================================================
# QUICK TEST
# ============================================================
if __name__ == "__main__":
print("Image detector ready.")
print("\nAvailable models:")
print(f" dima806 : {'βœ“' if DIMA_AVAILABLE else 'βœ—'}")
print(f" umm-maybe: {'βœ“' if UMM_AVAILABLE else 'βœ—'}")
print(f" NYUAD : {'βœ“' if NYUAD_AVAILABLE else 'βœ—'}")
print(f" FFT : βœ“ (always available)")
print(f" Noise : βœ“ (always available)")
print("\nTo evaluate on your own images:")
print(" from image_detector import evaluate_dataset")
print(" evaluate_dataset('path/to/real/', 'path/to/ai/', max_images=50)")