# ============================================================ # AI Image Detection Module # Models: dima806 (primary) + umm-maybe (secondary) + NYUAD (fallback) # Physics: FFT frequency analysis + Noise analysis # ============================================================ # # SETUP: # pip install torch torchvision transformers pillow numpy requests # pip install beautifulsoup4 opencv-python-headless scikit-learn # # USAGE: # from image_detector import predict_image, evaluate_dataset # ============================================================ import os import numpy as np import torch import requests import cv2 from PIL import Image from io import BytesIO from transformers import ( AutoModelForImageClassification, ViTImageProcessor, pipeline ) # ============================================================ # MODEL LOADING # ============================================================ print("Loading image detection models...") # ── Model 1: dima806 — primary, strong on general AI images ── try: dima_pipe = pipeline("image-classification", model="dima806/ai_vs_real_image_detection", device=0 if torch.cuda.is_available() else -1) DIMA_AVAILABLE = True print("✓ dima806 loaded") except Exception as e: print(f"✗ dima806 not available: {e}") DIMA_AVAILABLE = False # ── Model 2: umm-maybe — strong on Midjourney/SDXL ─────────── try: umm_pipe = pipeline("image-classification", model="umm-maybe/AI-image-detector", device=0 if torch.cuda.is_available() else -1) UMM_AVAILABLE = True print("✓ umm-maybe loaded") except Exception as e: print(f"✗ umm-maybe not available: {e}") UMM_AVAILABLE = False # ── Model 3: NYUAD — fallback, trained on DALL-E + SD ──────── BASE_DIR = os.path.dirname(os.path.abspath(__file__)) NYUAD_PATH = os.path.join(BASE_DIR, "nyuad_model") try: nyuad_processor = ViTImageProcessor.from_pretrained(NYUAD_PATH, local_files_only=True) nyuad_model = AutoModelForImageClassification.from_pretrained(NYUAD_PATH, trust_remote_code=True, local_files_only=True) nyuad_model.eval() NYUAD_AVAILABLE = True print("✓ NYUAD loaded") except Exception as e: print(f"✗ NYUAD not available: {e}") NYUAD_AVAILABLE = False print("Models ready.\n") # ============================================================ # INDIVIDUAL MODEL PREDICTORS # ============================================================ def predict_dima(image: Image.Image) -> dict | None: """ dima806 — primary model. Best for: general AI images, news photos, portraits. """ if not DIMA_AVAILABLE: return None try: results = dima_pipe(image.convert("RGB")) ai_score = next( (r["score"] for r in results if r["label"].upper() in ["FAKE", "AI", "ARTIFICIAL"]), None ) if ai_score is None: real_score = next((r["score"] for r in results if r["label"].upper() in ["REAL", "HUMAN"]), 0.5) ai_score = 1 - real_score return { "model": "dima806", "label": "AI-generated" if ai_score >= 0.5 else "Real", "ai_score": round(float(ai_score), 4) } except Exception as e: print(f"dima806 error: {e}") return None def predict_umm(image: Image.Image) -> dict | None: """ umm-maybe — secondary model. Best for: Midjourney, SDXL, newer diffusion models. """ if not UMM_AVAILABLE: return None try: results = umm_pipe(image.convert("RGB")) ai_score = next( (r["score"] for r in results if r["label"].upper() in ["FAKE", "AI", "ARTIFICIAL", "GENERATED"]), None ) if ai_score is None: real_score = next((r["score"] for r in results if r["label"].upper() in ["REAL", "HUMAN"]), 0.5) ai_score = 1 - real_score return { "model": "umm-maybe", "label": "AI-generated" if ai_score >= 0.5 else "Real", "ai_score": round(float(ai_score), 4) } except Exception as e: print(f"umm-maybe error: {e}") return None def predict_nyuad(image: Image.Image) -> dict | None: """ NYUAD ViT — fallback model. Best for: DALL-E, Stable Diffusion 1.x/2.x. """ if not NYUAD_AVAILABLE: return None try: image = image.convert("RGB") inputs = nyuad_processor(images=image, return_tensors="pt") with torch.no_grad(): outputs = nyuad_model(**inputs) probs = torch.softmax(outputs.logits, dim=-1).squeeze().tolist() scores = {nyuad_model.config.id2label[i]: round(p, 4) for i, p in enumerate(probs)} prediction = max(scores, key=scores.get) ai_score = round(1 - scores.get("real", 0), 4) return { "model": "NYUAD", "label": "AI-generated" if prediction != "real" else "Real", "ai_score": ai_score, "scores": scores } except Exception as e: print(f"NYUAD error: {e}") return None # ============================================================ # PHYSICS-BASED ANALYSIS # ============================================================ def fft_analysis(image: Image.Image) -> dict | None: """ FFT Frequency Analysis. Real photographs have a natural frequency falloff due to lens optics and sensor physics — high frequencies decay smoothly. AI images break this pattern: - Diffusion models produce unnatural high-frequency peaks - GAN images have characteristic checkerboard artifacts in frequency domain - Both tend to be unnaturally smooth in mid-frequencies This is generator-agnostic — works on any AI model because it exploits the physics of real cameras, not model-specific artifacts. """ try: gray = np.array(image.convert("L"), dtype=np.float32) fft = np.fft.fft2(gray) fft_shift = np.fft.fftshift(fft) magnitude = np.log(np.abs(fft_shift) + 1) h, w = magnitude.shape # Central peak ratio — real photos have stronger center dominance center_val = magnitude[h//2, w//2] mean_mag = magnitude.mean() center_ratio = float(center_val / (mean_mag + 1e-8)) # High frequency corners — AI images leak more energy into corners corners = np.concatenate([ magnitude[:h//8, :w//8 ].flatten(), magnitude[:h//8, -w//8:].flatten(), magnitude[-h//8:, :w//8 ].flatten(), magnitude[-h//8:, -w//8:].flatten() ]) hf_ratio = float(corners.mean() / (mean_mag + 1e-8)) # Mid-frequency uniformity — AI images are too smooth here mid_ring = magnitude[h//4:3*h//4, w//4:3*w//4] mid_std = float(mid_ring.std() / (magnitude.std() + 1e-8)) # Radial frequency falloff — real images follow power law decay # AI images deviate from this natural falloff cy, cx = h // 2, w // 2 y_idx, x_idx = np.ogrid[:h, :w] radius = np.sqrt((y_idx - cy)**2 + (x_idx - cx)**2).astype(int) max_r = min(cy, cx) radial_profile = np.array([magnitude[radius == r].mean() for r in range(1, max_r)]) # Real images: profile decays monotonically # AI images: profile has bumps and inconsistencies diffs = np.diff(radial_profile) non_monotonic = float((diffs > 0).mean()) # fraction of increasing steps # Combine signals into AI score # Higher center_ratio → more real # Higher hf_ratio → more AI # Lower mid_std → more AI (too smooth) # Higher non_monotonic → more AI (unnatural falloff) center_score = min(max(1 - (center_ratio - 3) / 10, 0), 1) hf_score = min(max(hf_ratio / 0.8, 0), 1) smoothness_score = min(max(1 - mid_std, 0), 1) falloff_score = min(max(non_monotonic * 2, 0), 1) ai_score = round( 0.25 * center_score + 0.30 * hf_score + 0.25 * smoothness_score + 0.20 * falloff_score, 4 ) return { "model": "FFT Analysis", "label": "AI-generated" if ai_score >= 0.5 else "Real", "ai_score": ai_score, "center_ratio": round(center_ratio, 3), "hf_ratio": round(hf_ratio, 3), "mid_std": round(mid_std, 3), "non_monotonic": round(non_monotonic, 3) } except Exception as e: print(f"FFT error: {e}") return None def noise_analysis(image: Image.Image) -> dict | None: """ Sensor Noise Analysis — NEW, replaces EXIF. Real camera sensors produce characteristic random noise patterns (photon shot noise + read noise). This noise follows specific statistical distributions and is spatially random. AI generated images are mathematically smooth — they lack this natural noise signature entirely, or have unnatural periodic noise from the generation process. This is more reliable than EXIF because: - EXIF is stripped by social media platforms - Noise is physically embedded in the pixel values - Cannot be removed without degrading the image """ try: img_array = np.array(image.convert("RGB"), dtype=np.float32) # Extract noise by subtracting a smoothed version smoothed = cv2.GaussianBlur(img_array, (5, 5), 0) noise = img_array - smoothed # Real camera noise properties noise_std = float(noise.std()) noise_mean = float(np.abs(noise).mean()) # Noise should be spatially random — check autocorrelation noise_gray = noise.mean(axis=2) autocorr = np.corrcoef(noise_gray[:-1].flatten(), noise_gray[1:].flatten())[0, 1] autocorr = float(autocorr) if not np.isnan(autocorr) else 0.0 # Real images: noise_std typically 3-15, autocorr near 0 # AI images: noise_std typically <2 (too smooth) or >20 (unnatural) # AI images: autocorr often higher (periodic noise patterns) # Too smooth → likely AI smoothness_ai = min(max(1 - (noise_std / 8), 0), 1) # High autocorrelation → likely AI (periodic patterns) autocorr_ai = min(max(abs(autocorr) * 2, 0), 1) # Noise uniformity across channels — real cameras have channel-specific noise channel_stds = [noise[:,:,c].std() for c in range(3)] channel_var = float(np.std(channel_stds) / (np.mean(channel_stds) + 1e-8)) uniformity_ai = min(max(1 - channel_var * 3, 0), 1) # too uniform → AI ai_score = round( 0.40 * smoothness_ai + 0.35 * autocorr_ai + 0.25 * uniformity_ai, 4 ) return { "model": "Noise Analysis", "label": "AI-generated" if ai_score >= 0.5 else "Real", "ai_score": ai_score, "noise_std": round(noise_std, 3), "autocorr": round(autocorr, 3), "channel_var": round(channel_var, 3) } except Exception as e: print(f"Noise analysis error: {e}") return None # ============================================================ # ENSEMBLE COMBINER # ============================================================ def predict_image_combined(image: Image.Image) -> dict: """ Principled ensemble detection strategy: 1. Run all available deep learning models 2. Run physics-based analysis (FFT + Noise) 3. Combine with confidence-weighted voting: - Deep learning models: 70% total weight - Physics analysis: 30% total weight 4. If all models agree → high confidence If models disagree → flag as uncertain Confidence disclaimer added for uncertain predictions — honest uncertainty is better than wrong certainty. """ results = {} # ── Deep Learning Models ───────────────────────────────── dima_result = predict_dima(image) umm_result = predict_umm(image) nyuad_result = predict_nyuad(image) # ── Physics Analysis ────────────────────────────────────── fft_result = fft_analysis(image) noise_result = noise_analysis(image) # ── Collect available scores ────────────────────────────── dl_scores = [] physics_scores = [] if dima_result: dl_scores.append(dima_result["ai_score"]) results["dima806"] = dima_result if umm_result: dl_scores.append(umm_result["ai_score"]) results["umm_maybe"] = umm_result if nyuad_result and not (dima_result or umm_result): # Only use NYUAD if neither primary model available dl_scores.append(nyuad_result["ai_score"]) results["nyuad"] = nyuad_result if fft_result: physics_scores.append(fft_result["ai_score"]) results["fft"] = fft_result if noise_result: physics_scores.append(noise_result["ai_score"]) results["noise"] = noise_result # ── Handle no models available ──────────────────────────── if not dl_scores and not physics_scores: return { "label": "Unknown", "confidence": 0.0, "ai_score": 0.5, "warning": "No models available", "breakdown": results } # ── Weighted combination ────────────────────────────────── scores = [] weights = [] if dl_scores: dl_avg = sum(dl_scores) / len(dl_scores) scores.append(dl_avg) weights.append(0.70) if physics_scores: phys_avg = sum(physics_scores) / len(physics_scores) scores.append(phys_avg) weights.append(0.30) total_weight = sum(weights) final_score = round(sum(s * w / total_weight for s, w in zip(scores, weights)), 4) # ── Agreement check ─────────────────────────────────────── all_scores = dl_scores + physics_scores all_labels = [1 if s >= 0.5 else 0 for s in all_scores] agreement = sum(all_labels) / len(all_labels) if all_labels else 0.5 models_agree = agreement >= 0.75 or agreement <= 0.25 # ── Confidence calculation ──────────────────────────────── raw_confidence = final_score if final_score >= 0.5 else 1 - final_score # Penalize confidence when models disagree adjusted_confidence = raw_confidence * (0.7 + 0.3 * (1 if models_agree else 0)) # ── Warning for uncertain predictions ──────────────────── warning = None if not models_agree: warning = "Models disagree — result may be unreliable. Newer AI generators (Midjourney v6, DALL-E 3, Flux) are harder to detect." elif adjusted_confidence < 0.65: warning = "Low confidence prediction. Treat this result with caution." return { "label": "AI-generated" if final_score >= 0.5 else "Real", "confidence": round(float(adjusted_confidence), 4), "ai_score": final_score, "models_used": list(results.keys()), "models_agree": models_agree, "warning": warning, "breakdown": results } # ============================================================ # EVALUATION — run on a folder of labeled images # ============================================================ def evaluate_dataset(real_folder: str, ai_folder: str, max_images: int = 50) -> dict: """ Evaluate the ensemble on a local dataset. Args: real_folder: path to folder of real images ai_folder: path to folder of AI generated images max_images: max images per class to evaluate Returns: dict with accuracy, precision, recall, F1, per-model breakdown """ from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score import json print(f"\nEvaluating on dataset...") print(f"Real folder : {real_folder}") print(f"AI folder : {ai_folder}") def load_images(folder, label, max_n): items = [] exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp"} for fname in os.listdir(folder)[:max_n]: if os.path.splitext(fname)[1].lower() in exts: try: img = Image.open(os.path.join(folder, fname)).convert("RGB") items.append((img, label, fname)) except Exception: continue return items real_images = load_images(real_folder, 0, max_images) ai_images = load_images(ai_folder, 1, max_images) all_images = real_images + ai_images print(f"Real images : {len(real_images)}") print(f"AI images : {len(ai_images)}") print(f"Total : {len(all_images)}\n") y_true, y_pred, y_scores = [], [], [] per_model_preds = { "dima806": [], "umm_maybe": [], "nyuad": [], "fft": [], "noise": [] } errors = [] for i, (img, label, fname) in enumerate(all_images): result = predict_image_combined(img) pred = 1 if result["label"] == "AI-generated" else 0 y_true.append(label) y_pred.append(pred) y_scores.append(result["ai_score"]) # Per model predictions for model_key in per_model_preds: if model_key in result["breakdown"] and result["breakdown"][model_key]: score = result["breakdown"][model_key]["ai_score"] per_model_preds[model_key].append((label, 1 if score >= 0.5 else 0, score)) if pred != label: errors.append({ "file": fname, "actual": "AI" if label == 1 else "Real", "predicted": result["label"], "score": result["ai_score"], "warning": result.get("warning") }) if (i + 1) % 10 == 0: print(f" Processed {i+1}/{len(all_images)}...") # ── Overall metrics ─────────────────────────────────────── report = classification_report(y_true, y_pred, target_names=["Real", "AI"], output_dict=True) cm = confusion_matrix(y_true, y_pred) try: auc = roc_auc_score(y_true, y_scores) except Exception: auc = None print("\n" + "="*50) print("EVALUATION RESULTS") print("="*50) print(classification_report(y_true, y_pred, target_names=["Real", "AI"])) print(f"Confusion Matrix:\n{cm}") if auc: print(f"ROC-AUC: {auc:.4f}") # ── Per model breakdown ─────────────────────────────────── print("\nPer-model breakdown:") for model_name, preds in per_model_preds.items(): if preds: mt, mp, _ = zip(*preds) acc = sum(t == p for t, p in zip(mt, mp)) / len(mt) print(f" {model_name:<15} accuracy: {acc*100:.1f}% ({len(preds)} images)") # ── Error analysis ──────────────────────────────────────── print(f"\nErrors ({len(errors)} total):") for e in errors[:10]: print(f" [{e['actual']} → {e['predicted']}] {e['file']} (score={e['score']})") if e["warning"]: print(f" ⚠ {e['warning']}") return { "accuracy": report["accuracy"], "f1": report["weighted avg"]["f1-score"], "precision": report["weighted avg"]["precision"], "recall": report["weighted avg"]["recall"], "auc": auc, "confusion_matrix": cm.tolist(), "errors": errors, "per_model": {k: len(v) for k, v in per_model_preds.items() if v} } # ============================================================ # UTILITY — load image from URL # ============================================================ def load_image_from_url(url: str) -> Image.Image: headers = {"User-Agent": "Mozilla/5.0"} resp = requests.get(url, headers=headers, timeout=10) resp.raise_for_status() return Image.open(BytesIO(resp.content)).convert("RGB") # ============================================================ # QUICK TEST # ============================================================ if __name__ == "__main__": print("Image detector ready.") print("\nAvailable models:") print(f" dima806 : {'✓' if DIMA_AVAILABLE else '✗'}") print(f" umm-maybe: {'✓' if UMM_AVAILABLE else '✗'}") print(f" NYUAD : {'✓' if NYUAD_AVAILABLE else '✗'}") print(f" FFT : ✓ (always available)") print(f" Noise : ✓ (always available)") print("\nTo evaluate on your own images:") print(" from image_detector import evaluate_dataset") print(" evaluate_dataset('path/to/real/', 'path/to/ai/', max_images=50)")