Spaces:
Sleeping
Sleeping
| # ============================================================ | |
| # AI Image Detection Module | |
| # Models: dima806 (primary) + umm-maybe (secondary) + NYUAD (fallback) | |
| # Physics: FFT frequency analysis + Noise analysis | |
| # ============================================================ | |
| # | |
| # SETUP: | |
| # pip install torch torchvision transformers pillow numpy requests | |
| # pip install beautifulsoup4 opencv-python-headless scikit-learn | |
| # | |
| # USAGE: | |
| # from image_detector import predict_image, evaluate_dataset | |
| # ============================================================ | |
| import os | |
| import numpy as np | |
| import torch | |
| import requests | |
| import cv2 | |
| from PIL import Image | |
| from io import BytesIO | |
| from transformers import ( | |
| AutoModelForImageClassification, | |
| ViTImageProcessor, | |
| pipeline | |
| ) | |
| # ============================================================ | |
| # MODEL LOADING | |
| # ============================================================ | |
| print("Loading image detection models...") | |
| # ββ Model 1: dima806 β primary, strong on general AI images ββ | |
| try: | |
| dima_pipe = pipeline("image-classification", model="dima806/ai_vs_real_image_detection", device=0 if torch.cuda.is_available() else -1) | |
| DIMA_AVAILABLE = True | |
| print("β dima806 loaded") | |
| except Exception as e: | |
| print(f"β dima806 not available: {e}") | |
| DIMA_AVAILABLE = False | |
| # ββ Model 2: umm-maybe β strong on Midjourney/SDXL βββββββββββ | |
| try: | |
| umm_pipe = pipeline("image-classification", model="umm-maybe/AI-image-detector", device=0 if torch.cuda.is_available() else -1) | |
| UMM_AVAILABLE = True | |
| print("β umm-maybe loaded") | |
| except Exception as e: | |
| print(f"β umm-maybe not available: {e}") | |
| UMM_AVAILABLE = False | |
| # ββ Model 3: NYUAD β fallback, trained on DALL-E + SD ββββββββ | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| NYUAD_PATH = os.path.join(BASE_DIR, "nyuad_model") | |
| try: | |
| nyuad_processor = ViTImageProcessor.from_pretrained(NYUAD_PATH, local_files_only=True) | |
| nyuad_model = AutoModelForImageClassification.from_pretrained(NYUAD_PATH, trust_remote_code=True, local_files_only=True) | |
| nyuad_model.eval() | |
| NYUAD_AVAILABLE = True | |
| print("β NYUAD loaded") | |
| except Exception as e: | |
| print(f"β NYUAD not available: {e}") | |
| NYUAD_AVAILABLE = False | |
| print("Models ready.\n") | |
| # ============================================================ | |
| # INDIVIDUAL MODEL PREDICTORS | |
| # ============================================================ | |
| def predict_dima(image: Image.Image) -> dict | None: | |
| """ | |
| dima806 β primary model. | |
| Best for: general AI images, news photos, portraits. | |
| """ | |
| if not DIMA_AVAILABLE: | |
| return None | |
| try: | |
| results = dima_pipe(image.convert("RGB")) | |
| ai_score = next( | |
| (r["score"] for r in results if r["label"].upper() in ["FAKE", "AI", "ARTIFICIAL"]), | |
| None | |
| ) | |
| if ai_score is None: | |
| real_score = next((r["score"] for r in results if r["label"].upper() in ["REAL", "HUMAN"]), 0.5) | |
| ai_score = 1 - real_score | |
| return { | |
| "model": "dima806", | |
| "label": "AI-generated" if ai_score >= 0.5 else "Real", | |
| "ai_score": round(float(ai_score), 4) | |
| } | |
| except Exception as e: | |
| print(f"dima806 error: {e}") | |
| return None | |
| def predict_umm(image: Image.Image) -> dict | None: | |
| """ | |
| umm-maybe β secondary model. | |
| Best for: Midjourney, SDXL, newer diffusion models. | |
| """ | |
| if not UMM_AVAILABLE: | |
| return None | |
| try: | |
| results = umm_pipe(image.convert("RGB")) | |
| ai_score = next( | |
| (r["score"] for r in results if r["label"].upper() in ["FAKE", "AI", "ARTIFICIAL", "GENERATED"]), | |
| None | |
| ) | |
| if ai_score is None: | |
| real_score = next((r["score"] for r in results if r["label"].upper() in ["REAL", "HUMAN"]), 0.5) | |
| ai_score = 1 - real_score | |
| return { | |
| "model": "umm-maybe", | |
| "label": "AI-generated" if ai_score >= 0.5 else "Real", | |
| "ai_score": round(float(ai_score), 4) | |
| } | |
| except Exception as e: | |
| print(f"umm-maybe error: {e}") | |
| return None | |
| def predict_nyuad(image: Image.Image) -> dict | None: | |
| """ | |
| NYUAD ViT β fallback model. | |
| Best for: DALL-E, Stable Diffusion 1.x/2.x. | |
| """ | |
| if not NYUAD_AVAILABLE: | |
| return None | |
| try: | |
| image = image.convert("RGB") | |
| inputs = nyuad_processor(images=image, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = nyuad_model(**inputs) | |
| probs = torch.softmax(outputs.logits, dim=-1).squeeze().tolist() | |
| scores = {nyuad_model.config.id2label[i]: round(p, 4) for i, p in enumerate(probs)} | |
| prediction = max(scores, key=scores.get) | |
| ai_score = round(1 - scores.get("real", 0), 4) | |
| return { | |
| "model": "NYUAD", | |
| "label": "AI-generated" if prediction != "real" else "Real", | |
| "ai_score": ai_score, | |
| "scores": scores | |
| } | |
| except Exception as e: | |
| print(f"NYUAD error: {e}") | |
| return None | |
| # ============================================================ | |
| # PHYSICS-BASED ANALYSIS | |
| # ============================================================ | |
| def fft_analysis(image: Image.Image) -> dict | None: | |
| """ | |
| FFT Frequency Analysis. | |
| Real photographs have a natural frequency falloff due to lens optics | |
| and sensor physics β high frequencies decay smoothly. | |
| AI images break this pattern: | |
| - Diffusion models produce unnatural high-frequency peaks | |
| - GAN images have characteristic checkerboard artifacts in frequency domain | |
| - Both tend to be unnaturally smooth in mid-frequencies | |
| This is generator-agnostic β works on any AI model because it | |
| exploits the physics of real cameras, not model-specific artifacts. | |
| """ | |
| try: | |
| gray = np.array(image.convert("L"), dtype=np.float32) | |
| fft = np.fft.fft2(gray) | |
| fft_shift = np.fft.fftshift(fft) | |
| magnitude = np.log(np.abs(fft_shift) + 1) | |
| h, w = magnitude.shape | |
| # Central peak ratio β real photos have stronger center dominance | |
| center_val = magnitude[h//2, w//2] | |
| mean_mag = magnitude.mean() | |
| center_ratio = float(center_val / (mean_mag + 1e-8)) | |
| # High frequency corners β AI images leak more energy into corners | |
| corners = np.concatenate([ | |
| magnitude[:h//8, :w//8 ].flatten(), | |
| magnitude[:h//8, -w//8:].flatten(), | |
| magnitude[-h//8:, :w//8 ].flatten(), | |
| magnitude[-h//8:, -w//8:].flatten() | |
| ]) | |
| hf_ratio = float(corners.mean() / (mean_mag + 1e-8)) | |
| # Mid-frequency uniformity β AI images are too smooth here | |
| mid_ring = magnitude[h//4:3*h//4, w//4:3*w//4] | |
| mid_std = float(mid_ring.std() / (magnitude.std() + 1e-8)) | |
| # Radial frequency falloff β real images follow power law decay | |
| # AI images deviate from this natural falloff | |
| cy, cx = h // 2, w // 2 | |
| y_idx, x_idx = np.ogrid[:h, :w] | |
| radius = np.sqrt((y_idx - cy)**2 + (x_idx - cx)**2).astype(int) | |
| max_r = min(cy, cx) | |
| radial_profile = np.array([magnitude[radius == r].mean() for r in range(1, max_r)]) | |
| # Real images: profile decays monotonically | |
| # AI images: profile has bumps and inconsistencies | |
| diffs = np.diff(radial_profile) | |
| non_monotonic = float((diffs > 0).mean()) # fraction of increasing steps | |
| # Combine signals into AI score | |
| # Higher center_ratio β more real | |
| # Higher hf_ratio β more AI | |
| # Lower mid_std β more AI (too smooth) | |
| # Higher non_monotonic β more AI (unnatural falloff) | |
| center_score = min(max(1 - (center_ratio - 3) / 10, 0), 1) | |
| hf_score = min(max(hf_ratio / 0.8, 0), 1) | |
| smoothness_score = min(max(1 - mid_std, 0), 1) | |
| falloff_score = min(max(non_monotonic * 2, 0), 1) | |
| ai_score = round( | |
| 0.25 * center_score + | |
| 0.30 * hf_score + | |
| 0.25 * smoothness_score + | |
| 0.20 * falloff_score, | |
| 4 | |
| ) | |
| return { | |
| "model": "FFT Analysis", | |
| "label": "AI-generated" if ai_score >= 0.5 else "Real", | |
| "ai_score": ai_score, | |
| "center_ratio": round(center_ratio, 3), | |
| "hf_ratio": round(hf_ratio, 3), | |
| "mid_std": round(mid_std, 3), | |
| "non_monotonic": round(non_monotonic, 3) | |
| } | |
| except Exception as e: | |
| print(f"FFT error: {e}") | |
| return None | |
| def noise_analysis(image: Image.Image) -> dict | None: | |
| """ | |
| Sensor Noise Analysis β NEW, replaces EXIF. | |
| Real camera sensors produce characteristic random noise patterns | |
| (photon shot noise + read noise). This noise follows specific | |
| statistical distributions and is spatially random. | |
| AI generated images are mathematically smooth β they lack this | |
| natural noise signature entirely, or have unnatural periodic noise | |
| from the generation process. | |
| This is more reliable than EXIF because: | |
| - EXIF is stripped by social media platforms | |
| - Noise is physically embedded in the pixel values | |
| - Cannot be removed without degrading the image | |
| """ | |
| try: | |
| img_array = np.array(image.convert("RGB"), dtype=np.float32) | |
| # Extract noise by subtracting a smoothed version | |
| smoothed = cv2.GaussianBlur(img_array, (5, 5), 0) | |
| noise = img_array - smoothed | |
| # Real camera noise properties | |
| noise_std = float(noise.std()) | |
| noise_mean = float(np.abs(noise).mean()) | |
| # Noise should be spatially random β check autocorrelation | |
| noise_gray = noise.mean(axis=2) | |
| autocorr = np.corrcoef(noise_gray[:-1].flatten(), noise_gray[1:].flatten())[0, 1] | |
| autocorr = float(autocorr) if not np.isnan(autocorr) else 0.0 | |
| # Real images: noise_std typically 3-15, autocorr near 0 | |
| # AI images: noise_std typically <2 (too smooth) or >20 (unnatural) | |
| # AI images: autocorr often higher (periodic noise patterns) | |
| # Too smooth β likely AI | |
| smoothness_ai = min(max(1 - (noise_std / 8), 0), 1) | |
| # High autocorrelation β likely AI (periodic patterns) | |
| autocorr_ai = min(max(abs(autocorr) * 2, 0), 1) | |
| # Noise uniformity across channels β real cameras have channel-specific noise | |
| channel_stds = [noise[:,:,c].std() for c in range(3)] | |
| channel_var = float(np.std(channel_stds) / (np.mean(channel_stds) + 1e-8)) | |
| uniformity_ai = min(max(1 - channel_var * 3, 0), 1) # too uniform β AI | |
| ai_score = round( | |
| 0.40 * smoothness_ai + | |
| 0.35 * autocorr_ai + | |
| 0.25 * uniformity_ai, | |
| 4 | |
| ) | |
| return { | |
| "model": "Noise Analysis", | |
| "label": "AI-generated" if ai_score >= 0.5 else "Real", | |
| "ai_score": ai_score, | |
| "noise_std": round(noise_std, 3), | |
| "autocorr": round(autocorr, 3), | |
| "channel_var": round(channel_var, 3) | |
| } | |
| except Exception as e: | |
| print(f"Noise analysis error: {e}") | |
| return None | |
| # ============================================================ | |
| # ENSEMBLE COMBINER | |
| # ============================================================ | |
| def predict_image_combined(image: Image.Image) -> dict: | |
| """ | |
| Principled ensemble detection strategy: | |
| 1. Run all available deep learning models | |
| 2. Run physics-based analysis (FFT + Noise) | |
| 3. Combine with confidence-weighted voting: | |
| - Deep learning models: 70% total weight | |
| - Physics analysis: 30% total weight | |
| 4. If all models agree β high confidence | |
| If models disagree β flag as uncertain | |
| Confidence disclaimer added for uncertain predictions β | |
| honest uncertainty is better than wrong certainty. | |
| """ | |
| results = {} | |
| # ββ Deep Learning Models βββββββββββββββββββββββββββββββββ | |
| dima_result = predict_dima(image) | |
| umm_result = predict_umm(image) | |
| nyuad_result = predict_nyuad(image) | |
| # ββ Physics Analysis ββββββββββββββββββββββββββββββββββββββ | |
| fft_result = fft_analysis(image) | |
| noise_result = noise_analysis(image) | |
| # ββ Collect available scores ββββββββββββββββββββββββββββββ | |
| dl_scores = [] | |
| physics_scores = [] | |
| if dima_result: | |
| dl_scores.append(dima_result["ai_score"]) | |
| results["dima806"] = dima_result | |
| if umm_result: | |
| dl_scores.append(umm_result["ai_score"]) | |
| results["umm_maybe"] = umm_result | |
| if nyuad_result and not (dima_result or umm_result): | |
| # Only use NYUAD if neither primary model available | |
| dl_scores.append(nyuad_result["ai_score"]) | |
| results["nyuad"] = nyuad_result | |
| if fft_result: | |
| physics_scores.append(fft_result["ai_score"]) | |
| results["fft"] = fft_result | |
| if noise_result: | |
| physics_scores.append(noise_result["ai_score"]) | |
| results["noise"] = noise_result | |
| # ββ Handle no models available ββββββββββββββββββββββββββββ | |
| if not dl_scores and not physics_scores: | |
| return { | |
| "label": "Unknown", | |
| "confidence": 0.0, | |
| "ai_score": 0.5, | |
| "warning": "No models available", | |
| "breakdown": results | |
| } | |
| # ββ Weighted combination ββββββββββββββββββββββββββββββββββ | |
| scores = [] | |
| weights = [] | |
| if dl_scores: | |
| dl_avg = sum(dl_scores) / len(dl_scores) | |
| scores.append(dl_avg) | |
| weights.append(0.70) | |
| if physics_scores: | |
| phys_avg = sum(physics_scores) / len(physics_scores) | |
| scores.append(phys_avg) | |
| weights.append(0.30) | |
| total_weight = sum(weights) | |
| final_score = round(sum(s * w / total_weight for s, w in zip(scores, weights)), 4) | |
| # ββ Agreement check βββββββββββββββββββββββββββββββββββββββ | |
| all_scores = dl_scores + physics_scores | |
| all_labels = [1 if s >= 0.5 else 0 for s in all_scores] | |
| agreement = sum(all_labels) / len(all_labels) if all_labels else 0.5 | |
| models_agree = agreement >= 0.75 or agreement <= 0.25 | |
| # ββ Confidence calculation ββββββββββββββββββββββββββββββββ | |
| raw_confidence = final_score if final_score >= 0.5 else 1 - final_score | |
| # Penalize confidence when models disagree | |
| adjusted_confidence = raw_confidence * (0.7 + 0.3 * (1 if models_agree else 0)) | |
| # ββ Warning for uncertain predictions ββββββββββββββββββββ | |
| warning = None | |
| if not models_agree: | |
| warning = "Models disagree β result may be unreliable. Newer AI generators (Midjourney v6, DALL-E 3, Flux) are harder to detect." | |
| elif adjusted_confidence < 0.65: | |
| warning = "Low confidence prediction. Treat this result with caution." | |
| return { | |
| "label": "AI-generated" if final_score >= 0.5 else "Real", | |
| "confidence": round(float(adjusted_confidence), 4), | |
| "ai_score": final_score, | |
| "models_used": list(results.keys()), | |
| "models_agree": models_agree, | |
| "warning": warning, | |
| "breakdown": results | |
| } | |
| # ============================================================ | |
| # EVALUATION β run on a folder of labeled images | |
| # ============================================================ | |
| def evaluate_dataset(real_folder: str, ai_folder: str, max_images: int = 50) -> dict: | |
| """ | |
| Evaluate the ensemble on a local dataset. | |
| Args: | |
| real_folder: path to folder of real images | |
| ai_folder: path to folder of AI generated images | |
| max_images: max images per class to evaluate | |
| Returns: | |
| dict with accuracy, precision, recall, F1, per-model breakdown | |
| """ | |
| from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score | |
| import json | |
| print(f"\nEvaluating on dataset...") | |
| print(f"Real folder : {real_folder}") | |
| print(f"AI folder : {ai_folder}") | |
| def load_images(folder, label, max_n): | |
| items = [] | |
| exts = {".jpg", ".jpeg", ".png", ".webp", ".bmp"} | |
| for fname in os.listdir(folder)[:max_n]: | |
| if os.path.splitext(fname)[1].lower() in exts: | |
| try: | |
| img = Image.open(os.path.join(folder, fname)).convert("RGB") | |
| items.append((img, label, fname)) | |
| except Exception: | |
| continue | |
| return items | |
| real_images = load_images(real_folder, 0, max_images) | |
| ai_images = load_images(ai_folder, 1, max_images) | |
| all_images = real_images + ai_images | |
| print(f"Real images : {len(real_images)}") | |
| print(f"AI images : {len(ai_images)}") | |
| print(f"Total : {len(all_images)}\n") | |
| y_true, y_pred, y_scores = [], [], [] | |
| per_model_preds = { | |
| "dima806": [], "umm_maybe": [], "nyuad": [], | |
| "fft": [], "noise": [] | |
| } | |
| errors = [] | |
| for i, (img, label, fname) in enumerate(all_images): | |
| result = predict_image_combined(img) | |
| pred = 1 if result["label"] == "AI-generated" else 0 | |
| y_true.append(label) | |
| y_pred.append(pred) | |
| y_scores.append(result["ai_score"]) | |
| # Per model predictions | |
| for model_key in per_model_preds: | |
| if model_key in result["breakdown"] and result["breakdown"][model_key]: | |
| score = result["breakdown"][model_key]["ai_score"] | |
| per_model_preds[model_key].append((label, 1 if score >= 0.5 else 0, score)) | |
| if pred != label: | |
| errors.append({ | |
| "file": fname, | |
| "actual": "AI" if label == 1 else "Real", | |
| "predicted": result["label"], | |
| "score": result["ai_score"], | |
| "warning": result.get("warning") | |
| }) | |
| if (i + 1) % 10 == 0: | |
| print(f" Processed {i+1}/{len(all_images)}...") | |
| # ββ Overall metrics βββββββββββββββββββββββββββββββββββββββ | |
| report = classification_report(y_true, y_pred, target_names=["Real", "AI"], output_dict=True) | |
| cm = confusion_matrix(y_true, y_pred) | |
| try: | |
| auc = roc_auc_score(y_true, y_scores) | |
| except Exception: | |
| auc = None | |
| print("\n" + "="*50) | |
| print("EVALUATION RESULTS") | |
| print("="*50) | |
| print(classification_report(y_true, y_pred, target_names=["Real", "AI"])) | |
| print(f"Confusion Matrix:\n{cm}") | |
| if auc: | |
| print(f"ROC-AUC: {auc:.4f}") | |
| # ββ Per model breakdown βββββββββββββββββββββββββββββββββββ | |
| print("\nPer-model breakdown:") | |
| for model_name, preds in per_model_preds.items(): | |
| if preds: | |
| mt, mp, _ = zip(*preds) | |
| acc = sum(t == p for t, p in zip(mt, mp)) / len(mt) | |
| print(f" {model_name:<15} accuracy: {acc*100:.1f}% ({len(preds)} images)") | |
| # ββ Error analysis ββββββββββββββββββββββββββββββββββββββββ | |
| print(f"\nErrors ({len(errors)} total):") | |
| for e in errors[:10]: | |
| print(f" [{e['actual']} β {e['predicted']}] {e['file']} (score={e['score']})") | |
| if e["warning"]: | |
| print(f" β {e['warning']}") | |
| return { | |
| "accuracy": report["accuracy"], | |
| "f1": report["weighted avg"]["f1-score"], | |
| "precision": report["weighted avg"]["precision"], | |
| "recall": report["weighted avg"]["recall"], | |
| "auc": auc, | |
| "confusion_matrix": cm.tolist(), | |
| "errors": errors, | |
| "per_model": {k: len(v) for k, v in per_model_preds.items() if v} | |
| } | |
| # ============================================================ | |
| # UTILITY β load image from URL | |
| # ============================================================ | |
| def load_image_from_url(url: str) -> Image.Image: | |
| headers = {"User-Agent": "Mozilla/5.0"} | |
| resp = requests.get(url, headers=headers, timeout=10) | |
| resp.raise_for_status() | |
| return Image.open(BytesIO(resp.content)).convert("RGB") | |
| # ============================================================ | |
| # QUICK TEST | |
| # ============================================================ | |
| if __name__ == "__main__": | |
| print("Image detector ready.") | |
| print("\nAvailable models:") | |
| print(f" dima806 : {'β' if DIMA_AVAILABLE else 'β'}") | |
| print(f" umm-maybe: {'β' if UMM_AVAILABLE else 'β'}") | |
| print(f" NYUAD : {'β' if NYUAD_AVAILABLE else 'β'}") | |
| print(f" FFT : β (always available)") | |
| print(f" Noise : β (always available)") | |
| print("\nTo evaluate on your own images:") | |
| print(" from image_detector import evaluate_dataset") | |
| print(" evaluate_dataset('path/to/real/', 'path/to/ai/', max_images=50)") |