Upload folder using huggingface_hub
Browse files- .gitignore +30 -0
- analyze_images.py +155 -0
- download_samples.py +15 -0
- eval_detector.py +133 -0
- eval_forensics.py +112 -0
- evaluate_forensics.py +146 -0
- improved_detector.py +407 -0
- optimized_detector.py +272 -0
- predict.py +134 -0
- requirements.txt +22 -0
- simple_detector.py +101 -0
- src/__init__.py +0 -0
- src/forensics/__init__.py +0 -0
- src/forensics/detector.py +946 -0
- src/fusion/__init__.py +0 -0
- src/fusion/combiner.py +251 -0
- src/neural/__init__.py +3 -0
- src/neural/detector.py +375 -0
- src/vlm/__init__.py +0 -0
- src/vlm/reasoner.py +636 -0
- test_ensemble.py +128 -0
- test_forensics.py +25 -0
- test_pretrained_detectors.py +302 -0
.gitignore
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.so
|
| 5 |
+
.Python
|
| 6 |
+
*.egg-info/
|
| 7 |
+
dist/
|
| 8 |
+
build/
|
| 9 |
+
|
| 10 |
+
# Data and outputs (don't commit large files)
|
| 11 |
+
data/
|
| 12 |
+
datasets/
|
| 13 |
+
outputs/
|
| 14 |
+
*.json
|
| 15 |
+
*.zip
|
| 16 |
+
|
| 17 |
+
# Environment
|
| 18 |
+
.env
|
| 19 |
+
.venv/
|
| 20 |
+
venv/
|
| 21 |
+
env/
|
| 22 |
+
|
| 23 |
+
# IDE
|
| 24 |
+
.vscode/
|
| 25 |
+
.idea/
|
| 26 |
+
*.swp
|
| 27 |
+
|
| 28 |
+
# OS
|
| 29 |
+
.DS_Store
|
| 30 |
+
Thumbs.db
|
analyze_images.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Analyze specific images to understand real vs fake characteristics."""
|
| 3 |
+
|
| 4 |
+
import cv2
|
| 5 |
+
import numpy as np
|
| 6 |
+
from glob import glob
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
def analyze_image(img_path):
|
| 10 |
+
"""Detailed analysis of an image."""
|
| 11 |
+
img = cv2.imread(img_path)
|
| 12 |
+
if img is None:
|
| 13 |
+
return None
|
| 14 |
+
|
| 15 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 16 |
+
h, w = gray.shape
|
| 17 |
+
|
| 18 |
+
results = {'shape': img.shape}
|
| 19 |
+
|
| 20 |
+
# 1. Basic stats
|
| 21 |
+
results['mean_brightness'] = np.mean(gray)
|
| 22 |
+
results['std_brightness'] = np.std(gray)
|
| 23 |
+
|
| 24 |
+
# 2. FFT analysis - look at specific frequencies
|
| 25 |
+
f_transform = np.fft.fft2(gray)
|
| 26 |
+
f_shift = np.fft.fftshift(f_transform)
|
| 27 |
+
magnitude = np.abs(f_shift)
|
| 28 |
+
|
| 29 |
+
center_h, center_w = h // 2, w // 2
|
| 30 |
+
max_radius = min(h, w) // 2
|
| 31 |
+
|
| 32 |
+
# Create distance map
|
| 33 |
+
y, x = np.ogrid[:h, :w]
|
| 34 |
+
distance = np.sqrt((y - center_h) ** 2 + (x - center_w) ** 2)
|
| 35 |
+
|
| 36 |
+
# Energy in bands
|
| 37 |
+
low_mask = distance < (max_radius * 0.1)
|
| 38 |
+
mid_mask = (distance >= max_radius * 0.1) & (distance < max_radius * 0.4)
|
| 39 |
+
high_mask = (distance >= max_radius * 0.4) & (distance < max_radius * 0.9)
|
| 40 |
+
|
| 41 |
+
low_energy = np.mean(magnitude[low_mask])
|
| 42 |
+
mid_energy = np.mean(magnitude[mid_mask])
|
| 43 |
+
high_energy = np.mean(magnitude[high_mask])
|
| 44 |
+
total = low_energy + mid_energy + high_energy
|
| 45 |
+
|
| 46 |
+
results['fft_low_ratio'] = low_energy / total
|
| 47 |
+
results['fft_mid_ratio'] = mid_energy / total
|
| 48 |
+
results['fft_high_ratio'] = high_energy / total
|
| 49 |
+
|
| 50 |
+
# 3. Noise analysis
|
| 51 |
+
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 52 |
+
noise = gray - blurred
|
| 53 |
+
results['noise_std'] = np.std(noise)
|
| 54 |
+
results['noise_mean'] = np.mean(np.abs(noise))
|
| 55 |
+
|
| 56 |
+
# Noise uniformity across regions
|
| 57 |
+
region_stds = []
|
| 58 |
+
block_size = h // 4
|
| 59 |
+
for i in range(4):
|
| 60 |
+
for j in range(4):
|
| 61 |
+
block = noise[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
|
| 62 |
+
region_stds.append(np.std(block))
|
| 63 |
+
results['noise_uniformity'] = np.std(region_stds) / (np.mean(region_stds) + 1e-10)
|
| 64 |
+
|
| 65 |
+
# 4. Laplacian variance (sharpness)
|
| 66 |
+
gray_uint8 = gray.astype(np.uint8)
|
| 67 |
+
laplacian = cv2.Laplacian(gray_uint8, cv2.CV_64F)
|
| 68 |
+
results['laplacian_var'] = laplacian.var()
|
| 69 |
+
|
| 70 |
+
# 5. Edge density
|
| 71 |
+
edges = cv2.Canny(gray.astype(np.uint8), 50, 150)
|
| 72 |
+
results['edge_density'] = np.mean(edges > 0)
|
| 73 |
+
|
| 74 |
+
# 6. Local variance statistics
|
| 75 |
+
kernel_size = 15
|
| 76 |
+
local_mean = cv2.blur(gray, (kernel_size, kernel_size))
|
| 77 |
+
local_sqr_mean = cv2.blur(gray ** 2, (kernel_size, kernel_size))
|
| 78 |
+
local_var = local_sqr_mean - local_mean ** 2
|
| 79 |
+
|
| 80 |
+
results['local_var_mean'] = np.mean(local_var)
|
| 81 |
+
results['local_var_std'] = np.std(local_var)
|
| 82 |
+
results['smooth_ratio'] = np.mean(local_var < 50)
|
| 83 |
+
|
| 84 |
+
# 7. DCT analysis on 8x8 blocks
|
| 85 |
+
ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
|
| 86 |
+
y_channel = ycrcb[:, :, 0].astype(np.float32)
|
| 87 |
+
h8, w8 = (h // 8) * 8, (w // 8) * 8
|
| 88 |
+
y_cropped = y_channel[:h8, :w8]
|
| 89 |
+
|
| 90 |
+
dct_stats = []
|
| 91 |
+
for i in range(0, h8, 8):
|
| 92 |
+
for j in range(0, w8, 8):
|
| 93 |
+
block = y_cropped[i:i+8, j:j+8]
|
| 94 |
+
dct = cv2.dct(block)
|
| 95 |
+
# High frequency energy (bottom-right of DCT block)
|
| 96 |
+
hf_energy = np.mean(np.abs(dct[4:, 4:]))
|
| 97 |
+
dct_stats.append(hf_energy)
|
| 98 |
+
|
| 99 |
+
results['dct_hf_mean'] = np.mean(dct_stats)
|
| 100 |
+
results['dct_hf_std'] = np.std(dct_stats)
|
| 101 |
+
|
| 102 |
+
# 8. Color saturation
|
| 103 |
+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
| 104 |
+
saturation = hsv[:, :, 1]
|
| 105 |
+
results['sat_mean'] = np.mean(saturation)
|
| 106 |
+
results['sat_std'] = np.std(saturation)
|
| 107 |
+
|
| 108 |
+
return results
|
| 109 |
+
|
| 110 |
+
def main():
|
| 111 |
+
data_dir = "data/ai_generated_v2"
|
| 112 |
+
images = glob(os.path.join(data_dir, "*.png"))
|
| 113 |
+
|
| 114 |
+
real_stats = {}
|
| 115 |
+
fake_stats = {}
|
| 116 |
+
|
| 117 |
+
for img_path in sorted(images):
|
| 118 |
+
filename = os.path.basename(img_path)
|
| 119 |
+
is_fake = "images_fake_" in filename
|
| 120 |
+
|
| 121 |
+
results = analyze_image(img_path)
|
| 122 |
+
if results is None:
|
| 123 |
+
continue
|
| 124 |
+
|
| 125 |
+
target = fake_stats if is_fake else real_stats
|
| 126 |
+
for k, v in results.items():
|
| 127 |
+
if k == 'shape':
|
| 128 |
+
continue
|
| 129 |
+
if k not in target:
|
| 130 |
+
target[k] = []
|
| 131 |
+
target[k].append(v)
|
| 132 |
+
|
| 133 |
+
print("\n" + "="*70)
|
| 134 |
+
print("DETAILED FEATURE COMPARISON: REAL vs FAKE")
|
| 135 |
+
print("="*70)
|
| 136 |
+
|
| 137 |
+
# Sort by absolute difference
|
| 138 |
+
features = []
|
| 139 |
+
for k in real_stats.keys():
|
| 140 |
+
real_mean = np.mean(real_stats[k])
|
| 141 |
+
fake_mean = np.mean(fake_stats[k])
|
| 142 |
+
diff = fake_mean - real_mean
|
| 143 |
+
sep = abs(diff) / (np.std(real_stats[k]) + np.std(fake_stats[k]) + 1e-10)
|
| 144 |
+
features.append((k, real_mean, fake_mean, diff, sep))
|
| 145 |
+
|
| 146 |
+
features.sort(key=lambda x: -abs(x[4])) # Sort by separation
|
| 147 |
+
|
| 148 |
+
for k, real_mean, fake_mean, diff, sep in features:
|
| 149 |
+
print(f"\n{k}:")
|
| 150 |
+
print(f" Real: {real_mean:.4f} ยฑ {np.std(real_stats[k]):.4f}")
|
| 151 |
+
print(f" Fake: {fake_mean:.4f} ยฑ {np.std(fake_stats[k]):.4f}")
|
| 152 |
+
print(f" Diff: {diff:+.4f} | Separation: {sep:.3f}")
|
| 153 |
+
|
| 154 |
+
if __name__ == "__main__":
|
| 155 |
+
main()
|
download_samples.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Download sample images for testing."""
|
| 2 |
+
import urllib.request
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
os.makedirs("data/real", exist_ok=True)
|
| 6 |
+
os.makedirs("data/manipulated", exist_ok=True)
|
| 7 |
+
|
| 8 |
+
# Real estate image sources (we'll use the sample we already have)
|
| 9 |
+
print("Sample images ready in data/test/")
|
| 10 |
+
print("For full testing, add real and AI-generated real estate images to:")
|
| 11 |
+
print(" - data/real/")
|
| 12 |
+
print(" - data/manipulated/")
|
| 13 |
+
print("\nYou can generate fake images using:")
|
| 14 |
+
print(" - DALL-E / Midjourney / Flux with 'modern kitchen interior' prompts")
|
| 15 |
+
print(" - Virtual staging tools")
|
eval_detector.py
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Evaluate forensic detector on test dataset."""
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
import numpy as np
|
| 7 |
+
from glob import glob
|
| 8 |
+
|
| 9 |
+
sys.path.insert(0, '/home/omer_aims_ac_za/digital-integrity-challenge')
|
| 10 |
+
from src.forensics.detector import ForensicDetector
|
| 11 |
+
|
| 12 |
+
def evaluate():
|
| 13 |
+
detector = ForensicDetector()
|
| 14 |
+
data_dir = "data/ai_generated_v2"
|
| 15 |
+
|
| 16 |
+
images = glob(os.path.join(data_dir, "*.png"))
|
| 17 |
+
|
| 18 |
+
real_scores = []
|
| 19 |
+
fake_scores = []
|
| 20 |
+
all_results = []
|
| 21 |
+
|
| 22 |
+
for img_path in sorted(images):
|
| 23 |
+
filename = os.path.basename(img_path)
|
| 24 |
+
# Check for images_fake_ vs images_real_ pattern
|
| 25 |
+
is_fake = "images_fake_" in filename
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
results = detector.analyze(img_path)
|
| 29 |
+
score = results["aggregate_score"]
|
| 30 |
+
|
| 31 |
+
all_results.append({
|
| 32 |
+
'filename': filename,
|
| 33 |
+
'is_fake': is_fake,
|
| 34 |
+
'score': score,
|
| 35 |
+
'results': results
|
| 36 |
+
})
|
| 37 |
+
|
| 38 |
+
if is_fake:
|
| 39 |
+
fake_scores.append(score)
|
| 40 |
+
else:
|
| 41 |
+
real_scores.append(score)
|
| 42 |
+
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"Error processing {filename}: {e}")
|
| 45 |
+
|
| 46 |
+
print("\n" + "="*60)
|
| 47 |
+
print("SCORE DISTRIBUTION")
|
| 48 |
+
print("="*60)
|
| 49 |
+
print(f"\nReal images (n={len(real_scores)}):")
|
| 50 |
+
print(f" Mean: {np.mean(real_scores):.3f}")
|
| 51 |
+
print(f" Std: {np.std(real_scores):.3f}")
|
| 52 |
+
print(f" Min: {np.min(real_scores):.3f}")
|
| 53 |
+
print(f" Max: {np.max(real_scores):.3f}")
|
| 54 |
+
|
| 55 |
+
print(f"\nFake images (n={len(fake_scores)}):")
|
| 56 |
+
print(f" Mean: {np.mean(fake_scores):.3f}")
|
| 57 |
+
print(f" Std: {np.std(fake_scores):.3f}")
|
| 58 |
+
print(f" Min: {np.min(fake_scores):.3f}")
|
| 59 |
+
print(f" Max: {np.max(fake_scores):.3f}")
|
| 60 |
+
|
| 61 |
+
# Find optimal threshold
|
| 62 |
+
print("\n" + "="*60)
|
| 63 |
+
print("THRESHOLD ANALYSIS")
|
| 64 |
+
print("="*60)
|
| 65 |
+
|
| 66 |
+
best_acc = 0
|
| 67 |
+
best_thresh = 0.5
|
| 68 |
+
|
| 69 |
+
for thresh in np.arange(0.2, 0.8, 0.01):
|
| 70 |
+
real_correct = sum(1 for s in real_scores if s < thresh)
|
| 71 |
+
fake_correct = sum(1 for s in fake_scores if s >= thresh)
|
| 72 |
+
acc = (real_correct + fake_correct) / (len(real_scores) + len(fake_scores))
|
| 73 |
+
|
| 74 |
+
if acc > best_acc:
|
| 75 |
+
best_acc = acc
|
| 76 |
+
best_thresh = thresh
|
| 77 |
+
|
| 78 |
+
print(f"\nBest threshold: {best_thresh:.2f}")
|
| 79 |
+
print(f"Best accuracy: {best_acc*100:.1f}%")
|
| 80 |
+
|
| 81 |
+
# Per-feature analysis
|
| 82 |
+
print("\n" + "="*60)
|
| 83 |
+
print("PER-FEATURE ANALYSIS (mean fake - mean real)")
|
| 84 |
+
print("="*60)
|
| 85 |
+
|
| 86 |
+
feature_names = ['fft_score', 'ela_score', 'noise_score', 'texture_score',
|
| 87 |
+
'compression_score', 'edge_score', 'sharpness_score',
|
| 88 |
+
'rich_poor_texture_score', 'color_consistency_score',
|
| 89 |
+
'lbp_score', 'glcm_score']
|
| 90 |
+
|
| 91 |
+
for feat in feature_names:
|
| 92 |
+
real_feat = [r['results'][feat] for r in all_results if not r['is_fake']]
|
| 93 |
+
fake_feat = [r['results'][feat] for r in all_results if r['is_fake']]
|
| 94 |
+
|
| 95 |
+
diff = np.mean(fake_feat) - np.mean(real_feat)
|
| 96 |
+
|
| 97 |
+
# Calculate feature's individual accuracy
|
| 98 |
+
best_feat_acc = 0
|
| 99 |
+
best_feat_dir = 1
|
| 100 |
+
for thresh in np.arange(0.1, 0.9, 0.02):
|
| 101 |
+
for direction in [1, -1]:
|
| 102 |
+
if direction == 1:
|
| 103 |
+
real_c = sum(1 for s in real_feat if s < thresh)
|
| 104 |
+
fake_c = sum(1 for s in fake_feat if s >= thresh)
|
| 105 |
+
else:
|
| 106 |
+
real_c = sum(1 for s in real_feat if s >= thresh)
|
| 107 |
+
fake_c = sum(1 for s in fake_feat if s < thresh)
|
| 108 |
+
acc = (real_c + fake_c) / (len(real_feat) + len(fake_feat))
|
| 109 |
+
if acc > best_feat_acc:
|
| 110 |
+
best_feat_acc = acc
|
| 111 |
+
best_feat_dir = direction
|
| 112 |
+
|
| 113 |
+
dir_str = "(+)" if best_feat_dir == 1 else "(-)"
|
| 114 |
+
print(f" {feat:28s}: diff={diff:+.3f} acc={best_feat_acc*100:.1f}% {dir_str}")
|
| 115 |
+
print(f" Real: {np.mean(real_feat):.3f}ยฑ{np.std(real_feat):.3f} | Fake: {np.mean(fake_feat):.3f}ยฑ{np.std(fake_feat):.3f}")
|
| 116 |
+
|
| 117 |
+
# Show misclassified examples
|
| 118 |
+
print("\n" + "="*60)
|
| 119 |
+
print("MISCLASSIFIED EXAMPLES (at threshold 0.5)")
|
| 120 |
+
print("="*60)
|
| 121 |
+
|
| 122 |
+
print("\nFalse positives (real classified as fake):")
|
| 123 |
+
for r in sorted(all_results, key=lambda x: -x['score']):
|
| 124 |
+
if not r['is_fake'] and r['score'] >= 0.5:
|
| 125 |
+
print(f" {r['filename']}: {r['score']:.3f}")
|
| 126 |
+
|
| 127 |
+
print("\nFalse negatives (fake classified as real):")
|
| 128 |
+
for r in sorted(all_results, key=lambda x: x['score']):
|
| 129 |
+
if r['is_fake'] and r['score'] < 0.5:
|
| 130 |
+
print(f" {r['filename']}: {r['score']:.3f}")
|
| 131 |
+
|
| 132 |
+
if __name__ == "__main__":
|
| 133 |
+
evaluate()
|
eval_forensics.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Evaluate forensics detector on real vs Flux-generated images."""
|
| 3 |
+
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.insert(0, '.')
|
| 6 |
+
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
import numpy as np
|
| 9 |
+
from src.forensics.detector import ForensicDetector
|
| 10 |
+
|
| 11 |
+
REAL_DIR = Path("data/real")
|
| 12 |
+
FAKE_DIR = Path("data/ai_generated_v2")
|
| 13 |
+
|
| 14 |
+
def evaluate():
|
| 15 |
+
detector = ForensicDetector()
|
| 16 |
+
|
| 17 |
+
# Real estate photos (definitely real)
|
| 18 |
+
real_estate_files = sorted(REAL_DIR.glob("*.jpg"))
|
| 19 |
+
|
| 20 |
+
# From ai_generated_v2: files with "_fake_" are AI, files with "_real_" are real
|
| 21 |
+
all_v2_files = sorted(FAKE_DIR.glob("*.png"))
|
| 22 |
+
fake_files = [f for f in all_v2_files if "_fake_" in f.name]
|
| 23 |
+
real_v2_files = [f for f in all_v2_files if "_real_" in f.name]
|
| 24 |
+
|
| 25 |
+
# Combine all real files
|
| 26 |
+
all_real_files = list(real_estate_files) + list(real_v2_files)
|
| 27 |
+
|
| 28 |
+
print(f"Testing {len(all_real_files)} real ({len(real_estate_files)} real_estate + {len(real_v2_files)} v2_real)")
|
| 29 |
+
print(f"Testing {len(fake_files)} fake (AI-generated)\n")
|
| 30 |
+
|
| 31 |
+
real_scores = []
|
| 32 |
+
fake_scores = []
|
| 33 |
+
real_details = []
|
| 34 |
+
fake_details = []
|
| 35 |
+
|
| 36 |
+
print("=== REAL IMAGES ===")
|
| 37 |
+
for f in all_real_files:
|
| 38 |
+
try:
|
| 39 |
+
result = detector.analyze(str(f))
|
| 40 |
+
score = result['aggregate_score']
|
| 41 |
+
real_scores.append(score)
|
| 42 |
+
real_details.append((f.name, result))
|
| 43 |
+
verdict = "CORRECT" if score < 0.5 else "WRONG"
|
| 44 |
+
print(f"{f.name}: {score:.3f} - {verdict}")
|
| 45 |
+
except Exception as e:
|
| 46 |
+
print(f"{f.name}: ERROR - {e}")
|
| 47 |
+
|
| 48 |
+
print("\n=== FAKE (AI-GENERATED) IMAGES ===")
|
| 49 |
+
for f in fake_files:
|
| 50 |
+
try:
|
| 51 |
+
result = detector.analyze(str(f))
|
| 52 |
+
score = result['aggregate_score']
|
| 53 |
+
fake_scores.append(score)
|
| 54 |
+
fake_details.append((f.name, result))
|
| 55 |
+
verdict = "CORRECT" if score >= 0.5 else "WRONG"
|
| 56 |
+
print(f"{f.name}: {score:.3f} - {verdict}")
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print(f"{f.name}: ERROR - {e}")
|
| 59 |
+
|
| 60 |
+
# Calculate accuracy
|
| 61 |
+
real_correct = sum(1 for s in real_scores if s < 0.5)
|
| 62 |
+
fake_correct = sum(1 for s in fake_scores if s >= 0.5)
|
| 63 |
+
|
| 64 |
+
print("\n" + "="*60)
|
| 65 |
+
print("SUMMARY")
|
| 66 |
+
print("="*60)
|
| 67 |
+
print(f"Real images: {real_correct}/{len(real_scores)} correct ({100*real_correct/len(real_scores):.1f}%)")
|
| 68 |
+
print(f"Fake images: {fake_correct}/{len(fake_scores)} correct ({100*fake_correct/len(fake_scores):.1f}%)")
|
| 69 |
+
total = len(real_scores) + len(fake_scores)
|
| 70 |
+
print(f"Overall: {real_correct + fake_correct}/{total} ({100*(real_correct + fake_correct)/total:.1f}%)")
|
| 71 |
+
|
| 72 |
+
print(f"\nReal scores: mean={np.mean(real_scores):.3f}, std={np.std(real_scores):.3f}")
|
| 73 |
+
print(f"Fake scores: mean={np.mean(fake_scores):.3f}, std={np.std(fake_scores):.3f}")
|
| 74 |
+
print(f"Separation: {np.mean(fake_scores) - np.mean(real_scores):.3f}")
|
| 75 |
+
|
| 76 |
+
# Analyze which signals discriminate best
|
| 77 |
+
print("\n" + "="*60)
|
| 78 |
+
print("SIGNAL DISCRIMINATION ANALYSIS (d' = Cohen's d)")
|
| 79 |
+
print("="*60)
|
| 80 |
+
|
| 81 |
+
signals = ['fft_score', 'ela_score', 'noise_score', 'texture_score',
|
| 82 |
+
'compression_score', 'edge_score', 'sharpness_score',
|
| 83 |
+
'rich_poor_texture_score', 'color_consistency_score',
|
| 84 |
+
'lbp_score', 'glcm_score']
|
| 85 |
+
|
| 86 |
+
disc_power = []
|
| 87 |
+
for sig in signals:
|
| 88 |
+
real_vals = [d[1][sig] for d in real_details]
|
| 89 |
+
fake_vals = [d[1][sig] for d in fake_details]
|
| 90 |
+
|
| 91 |
+
real_mean = np.mean(real_vals)
|
| 92 |
+
fake_mean = np.mean(fake_vals)
|
| 93 |
+
separation = fake_mean - real_mean
|
| 94 |
+
|
| 95 |
+
# Calculate discrimination power (Cohen's d)
|
| 96 |
+
real_std = np.std(real_vals)
|
| 97 |
+
fake_std = np.std(fake_vals)
|
| 98 |
+
pooled_std = np.sqrt((real_std**2 + fake_std**2) / 2)
|
| 99 |
+
d_prime = separation / (pooled_std + 1e-10)
|
| 100 |
+
disc_power.append((sig, d_prime, separation, real_mean, fake_mean))
|
| 101 |
+
|
| 102 |
+
print(f"{sig:25s}: real={real_mean:.3f}, fake={fake_mean:.3f}, sep={separation:+.3f}, d'={d_prime:+.2f}")
|
| 103 |
+
|
| 104 |
+
# Sort by absolute discrimination power
|
| 105 |
+
disc_power.sort(key=lambda x: abs(x[1]), reverse=True)
|
| 106 |
+
print("\n=== TOP DISCRIMINATORS (by |d'|) ===")
|
| 107 |
+
for sig, dp, sep, rm, fm in disc_power[:5]:
|
| 108 |
+
direction = "HIGHER for fake" if sep > 0 else "LOWER for fake"
|
| 109 |
+
print(f"{sig:25s}: d'={dp:+.2f} ({direction})")
|
| 110 |
+
|
| 111 |
+
if __name__ == "__main__":
|
| 112 |
+
evaluate()
|
evaluate_forensics.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Evaluate forensic detector on test datasets."""
|
| 3 |
+
|
| 4 |
+
import sys
|
| 5 |
+
import os
|
| 6 |
+
import glob
|
| 7 |
+
import json
|
| 8 |
+
import numpy as np
|
| 9 |
+
|
| 10 |
+
sys.path.insert(0, '.')
|
| 11 |
+
from src.forensics.detector import ForensicDetector
|
| 12 |
+
|
| 13 |
+
def evaluate_dataset(detector, image_paths, label, threshold=0.5):
|
| 14 |
+
"""Evaluate detector on a set of images with known label."""
|
| 15 |
+
results = []
|
| 16 |
+
for path in image_paths:
|
| 17 |
+
try:
|
| 18 |
+
result = detector.analyze(path)
|
| 19 |
+
result['path'] = os.path.basename(path)
|
| 20 |
+
result['true_label'] = label
|
| 21 |
+
result['predicted'] = 'fake' if result['aggregate_score'] >= threshold else 'real'
|
| 22 |
+
result['correct'] = (label == 'fake' and result['predicted'] == 'fake') or \
|
| 23 |
+
(label == 'real' and result['predicted'] == 'real')
|
| 24 |
+
results.append(result)
|
| 25 |
+
except Exception as e:
|
| 26 |
+
print(f"Error processing {path}: {e}")
|
| 27 |
+
return results
|
| 28 |
+
|
| 29 |
+
def print_analysis(all_results, threshold=0.5):
|
| 30 |
+
"""Print detailed analysis of results."""
|
| 31 |
+
fake_results = [r for r in all_results if r['true_label'] == 'fake']
|
| 32 |
+
real_results = [r for r in all_results if r['true_label'] == 'real']
|
| 33 |
+
|
| 34 |
+
# Calculate accuracy
|
| 35 |
+
fake_correct = sum(1 for r in fake_results if r['correct'])
|
| 36 |
+
real_correct = sum(1 for r in real_results if r['correct'])
|
| 37 |
+
|
| 38 |
+
print(f"\n{'='*60}")
|
| 39 |
+
print(f"OVERALL RESULTS (threshold={threshold})")
|
| 40 |
+
print(f"{'='*60}")
|
| 41 |
+
print(f"FAKE images: {fake_correct}/{len(fake_results)} correct ({100*fake_correct/max(1,len(fake_results)):.1f}%)")
|
| 42 |
+
print(f"REAL images: {real_correct}/{len(real_results)} correct ({100*real_correct/max(1,len(real_results)):.1f}%)")
|
| 43 |
+
print(f"Total accuracy: {(fake_correct+real_correct)}/{len(all_results)} ({100*(fake_correct+real_correct)/max(1,len(all_results)):.1f}%)")
|
| 44 |
+
|
| 45 |
+
# Per-feature analysis
|
| 46 |
+
features = [k for k in all_results[0].keys() if k.endswith('_score') and k != 'aggregate_score']
|
| 47 |
+
|
| 48 |
+
print(f"\n{'='*60}")
|
| 49 |
+
print("FEATURE DISCRIMINATION ANALYSIS")
|
| 50 |
+
print("(Higher fake_mean - real_mean = better discriminator)")
|
| 51 |
+
print(f"{'='*60}")
|
| 52 |
+
|
| 53 |
+
discriminators = []
|
| 54 |
+
for feat in features:
|
| 55 |
+
fake_scores = [r[feat] for r in fake_results]
|
| 56 |
+
real_scores = [r[feat] for r in real_results]
|
| 57 |
+
fake_mean = np.mean(fake_scores)
|
| 58 |
+
real_mean = np.mean(real_scores)
|
| 59 |
+
discrimination = fake_mean - real_mean # Positive = good (fake scores higher)
|
| 60 |
+
discriminators.append((feat, discrimination, fake_mean, real_mean, np.std(fake_scores), np.std(real_scores)))
|
| 61 |
+
|
| 62 |
+
# Sort by discrimination power
|
| 63 |
+
discriminators.sort(key=lambda x: x[1], reverse=True)
|
| 64 |
+
|
| 65 |
+
print(f"\n{'Feature':<30} {'Discrim':>8} {'Fake ฮผ':>8} {'Real ฮผ':>8} {'Fake ฯ':>8} {'Real ฯ':>8}")
|
| 66 |
+
print("-" * 78)
|
| 67 |
+
for feat, disc, fake_m, real_m, fake_s, real_s in discriminators:
|
| 68 |
+
print(f"{feat:<30} {disc:>+8.3f} {fake_m:>8.3f} {real_m:>8.3f} {fake_s:>8.3f} {real_s:>8.3f}")
|
| 69 |
+
|
| 70 |
+
# Aggregate score distribution
|
| 71 |
+
print(f"\n{'='*60}")
|
| 72 |
+
print("AGGREGATE SCORE DISTRIBUTION")
|
| 73 |
+
print(f"{'='*60}")
|
| 74 |
+
fake_agg = [r['aggregate_score'] for r in fake_results]
|
| 75 |
+
real_agg = [r['aggregate_score'] for r in real_results]
|
| 76 |
+
print(f"FAKE: mean={np.mean(fake_agg):.3f}, std={np.std(fake_agg):.3f}, min={np.min(fake_agg):.3f}, max={np.max(fake_agg):.3f}")
|
| 77 |
+
print(f"REAL: mean={np.mean(real_agg):.3f}, std={np.std(real_agg):.3f}, min={np.min(real_agg):.3f}, max={np.max(real_agg):.3f}")
|
| 78 |
+
|
| 79 |
+
# Show misclassified examples
|
| 80 |
+
print(f"\n{'='*60}")
|
| 81 |
+
print("MISCLASSIFIED EXAMPLES")
|
| 82 |
+
print(f"{'='*60}")
|
| 83 |
+
|
| 84 |
+
missed_fakes = [r for r in fake_results if not r['correct']]
|
| 85 |
+
false_positives = [r for r in real_results if not r['correct']]
|
| 86 |
+
|
| 87 |
+
print(f"\nMissed FAKE images (predicted as real): {len(missed_fakes)}")
|
| 88 |
+
for r in missed_fakes[:10]:
|
| 89 |
+
print(f" {r['path']}: agg={r['aggregate_score']:.3f}")
|
| 90 |
+
|
| 91 |
+
print(f"\nFalse positives (real predicted as fake): {len(false_positives)}")
|
| 92 |
+
for r in false_positives[:10]:
|
| 93 |
+
print(f" {r['path']}: agg={r['aggregate_score']:.3f}")
|
| 94 |
+
|
| 95 |
+
return discriminators
|
| 96 |
+
|
| 97 |
+
def main():
|
| 98 |
+
detector = ForensicDetector()
|
| 99 |
+
all_results = []
|
| 100 |
+
|
| 101 |
+
# Collect image paths
|
| 102 |
+
data_dir = '/home/omer_aims_ac_za/digital-integrity-challenge/data'
|
| 103 |
+
|
| 104 |
+
# AI generated images (fake)
|
| 105 |
+
fake_paths = []
|
| 106 |
+
fake_paths.extend(glob.glob(f'{data_dir}/ai_generated_v2/*.png'))
|
| 107 |
+
fake_paths.extend(glob.glob(f'{data_dir}/ai_generated/*.png'))
|
| 108 |
+
fake_paths.extend(glob.glob(f'{data_dir}/ai_generated/*.jpg'))
|
| 109 |
+
fake_paths.extend(glob.glob(f'{data_dir}/manipulated/*.jpg'))
|
| 110 |
+
fake_paths.extend(glob.glob(f'{data_dir}/test_subset/manip/*.jpg'))
|
| 111 |
+
|
| 112 |
+
# Real images
|
| 113 |
+
real_paths = []
|
| 114 |
+
real_paths.extend(glob.glob(f'{data_dir}/real/*.jpg'))
|
| 115 |
+
real_paths.extend(glob.glob(f'{data_dir}/test_subset/real/*.jpg'))
|
| 116 |
+
|
| 117 |
+
print(f"Found {len(fake_paths)} fake images and {len(real_paths)} real images")
|
| 118 |
+
|
| 119 |
+
# Run evaluation
|
| 120 |
+
print("\nProcessing fake images...")
|
| 121 |
+
fake_results = evaluate_dataset(detector, fake_paths, 'fake')
|
| 122 |
+
print(f"Processed {len(fake_results)} fake images")
|
| 123 |
+
|
| 124 |
+
print("\nProcessing real images...")
|
| 125 |
+
real_results = evaluate_dataset(detector, real_paths, 'real')
|
| 126 |
+
print(f"Processed {len(real_results)} real images")
|
| 127 |
+
|
| 128 |
+
all_results = fake_results + real_results
|
| 129 |
+
|
| 130 |
+
# Test different thresholds
|
| 131 |
+
for threshold in [0.35, 0.40, 0.45, 0.50]:
|
| 132 |
+
# Recalculate predictions with new threshold
|
| 133 |
+
for r in all_results:
|
| 134 |
+
r['predicted'] = 'fake' if r['aggregate_score'] >= threshold else 'real'
|
| 135 |
+
r['correct'] = (r['true_label'] == 'fake' and r['predicted'] == 'fake') or \
|
| 136 |
+
(r['true_label'] == 'real' and r['predicted'] == 'real')
|
| 137 |
+
|
| 138 |
+
print_analysis(all_results, threshold)
|
| 139 |
+
|
| 140 |
+
# Save detailed results
|
| 141 |
+
with open('/tmp/forensic_eval_results.json', 'w') as f:
|
| 142 |
+
json.dump(all_results, f, indent=2)
|
| 143 |
+
print(f"\nDetailed results saved to /tmp/forensic_eval_results.json")
|
| 144 |
+
|
| 145 |
+
if __name__ == "__main__":
|
| 146 |
+
main()
|
improved_detector.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Improved Forensic Detector - optimized for Flux-generated images.
|
| 4 |
+
Based on empirical analysis of ai_generated_v2 dataset.
|
| 5 |
+
|
| 6 |
+
Key findings from analysis:
|
| 7 |
+
- DCT high-frequency energy: Real > Fake (most discriminative)
|
| 8 |
+
- Local variance: Real > Fake (more texture detail)
|
| 9 |
+
- Saturation: Real > Fake
|
| 10 |
+
- Brightness: Real < Fake
|
| 11 |
+
|
| 12 |
+
Strategy: Focus on the most discriminative features, combine with proper weighting.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import cv2
|
| 16 |
+
import numpy as np
|
| 17 |
+
from PIL import Image
|
| 18 |
+
from typing import Dict
|
| 19 |
+
import tempfile
|
| 20 |
+
import os
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class ImprovedForensicDetector:
|
| 24 |
+
"""Optimized detector for AI-generated real estate images."""
|
| 25 |
+
|
| 26 |
+
def __init__(self):
|
| 27 |
+
pass
|
| 28 |
+
|
| 29 |
+
def analyze(self, image_path: str) -> Dict:
|
| 30 |
+
"""Run all forensic analyses on an image."""
|
| 31 |
+
img = cv2.imread(image_path)
|
| 32 |
+
if img is None:
|
| 33 |
+
raise ValueError(f"Could not load image: {image_path}")
|
| 34 |
+
|
| 35 |
+
results = {}
|
| 36 |
+
|
| 37 |
+
# === CORE FEATURES (most discriminative) ===
|
| 38 |
+
|
| 39 |
+
# 1. DCT High-Frequency Analysis (BEST discriminator)
|
| 40 |
+
results["dct_hf_score"] = self._dct_high_freq_analysis(img)
|
| 41 |
+
|
| 42 |
+
# 2. Local Variance Analysis (second best)
|
| 43 |
+
results["local_variance_score"] = self._local_variance_analysis(img)
|
| 44 |
+
|
| 45 |
+
# 3. Saturation Analysis
|
| 46 |
+
results["saturation_score"] = self._saturation_analysis(img)
|
| 47 |
+
|
| 48 |
+
# 4. Brightness Analysis
|
| 49 |
+
results["brightness_score"] = self._brightness_analysis(img)
|
| 50 |
+
|
| 51 |
+
# === SUPPORTING FEATURES ===
|
| 52 |
+
|
| 53 |
+
# 5. Texture complexity
|
| 54 |
+
results["texture_complexity_score"] = self._texture_complexity(img)
|
| 55 |
+
|
| 56 |
+
# 6. Noise pattern analysis
|
| 57 |
+
results["noise_pattern_score"] = self._noise_pattern_analysis(img)
|
| 58 |
+
|
| 59 |
+
# 7. Gradient distribution
|
| 60 |
+
results["gradient_score"] = self._gradient_distribution(img)
|
| 61 |
+
|
| 62 |
+
# 8. Color channel consistency
|
| 63 |
+
results["color_channel_score"] = self._color_channel_analysis(img)
|
| 64 |
+
|
| 65 |
+
# === AGGREGATION ===
|
| 66 |
+
# All scores are now: 0 = likely real, 1 = likely fake
|
| 67 |
+
|
| 68 |
+
# Weights based on discriminative power from analysis
|
| 69 |
+
weights = {
|
| 70 |
+
"dct_hf_score": 0.25, # Best discriminator
|
| 71 |
+
"local_variance_score": 0.20, # Second best
|
| 72 |
+
"saturation_score": 0.15, # Good discriminator
|
| 73 |
+
"brightness_score": 0.10, # Moderate
|
| 74 |
+
"texture_complexity_score": 0.12,
|
| 75 |
+
"noise_pattern_score": 0.08,
|
| 76 |
+
"gradient_score": 0.05,
|
| 77 |
+
"color_channel_score": 0.05,
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
results["aggregate_score"] = sum(
|
| 81 |
+
results[k] * weights[k] for k in weights
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
return results
|
| 85 |
+
|
| 86 |
+
def _dct_high_freq_analysis(self, img: np.ndarray) -> float:
|
| 87 |
+
"""
|
| 88 |
+
DCT high-frequency energy analysis.
|
| 89 |
+
|
| 90 |
+
Real images have MORE high-frequency DCT content.
|
| 91 |
+
Fake images are smoother, less HF energy.
|
| 92 |
+
|
| 93 |
+
Lower HF energy = more likely fake.
|
| 94 |
+
"""
|
| 95 |
+
ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
|
| 96 |
+
y_channel = ycrcb[:, :, 0].astype(np.float32)
|
| 97 |
+
h, w = y_channel.shape
|
| 98 |
+
|
| 99 |
+
h8, w8 = (h // 8) * 8, (w // 8) * 8
|
| 100 |
+
if h8 < 16 or w8 < 16:
|
| 101 |
+
return 0.5
|
| 102 |
+
|
| 103 |
+
y_cropped = y_channel[:h8, :w8]
|
| 104 |
+
|
| 105 |
+
hf_energies = []
|
| 106 |
+
total_energies = []
|
| 107 |
+
|
| 108 |
+
for i in range(0, h8, 8):
|
| 109 |
+
for j in range(0, w8, 8):
|
| 110 |
+
block = y_cropped[i:i+8, j:j+8]
|
| 111 |
+
dct = cv2.dct(block)
|
| 112 |
+
|
| 113 |
+
# High frequency: bottom-right quadrant of 8x8 DCT
|
| 114 |
+
hf_energy = np.mean(np.abs(dct[4:, 4:]))
|
| 115 |
+
# Total energy for normalization
|
| 116 |
+
total_energy = np.mean(np.abs(dct))
|
| 117 |
+
|
| 118 |
+
hf_energies.append(hf_energy)
|
| 119 |
+
total_energies.append(total_energy)
|
| 120 |
+
|
| 121 |
+
mean_hf = np.mean(hf_energies)
|
| 122 |
+
|
| 123 |
+
# From analysis: Real ~1.86, Fake ~0.89
|
| 124 |
+
# Score: lower HF = higher fake score
|
| 125 |
+
if mean_hf < 0.5:
|
| 126 |
+
score = 0.9 # Very low HF, likely fake
|
| 127 |
+
elif mean_hf < 1.0:
|
| 128 |
+
score = 0.7
|
| 129 |
+
elif mean_hf < 1.5:
|
| 130 |
+
score = 0.5
|
| 131 |
+
elif mean_hf < 2.0:
|
| 132 |
+
score = 0.3
|
| 133 |
+
else:
|
| 134 |
+
score = 0.15 # High HF, likely real
|
| 135 |
+
|
| 136 |
+
return float(np.clip(score, 0, 1))
|
| 137 |
+
|
| 138 |
+
def _local_variance_analysis(self, img: np.ndarray) -> float:
|
| 139 |
+
"""
|
| 140 |
+
Local variance analysis.
|
| 141 |
+
|
| 142 |
+
Real images have MORE local variance (more texture detail).
|
| 143 |
+
Fake images tend to be smoother.
|
| 144 |
+
|
| 145 |
+
Lower variance = more likely fake.
|
| 146 |
+
"""
|
| 147 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 148 |
+
|
| 149 |
+
kernel_size = 15
|
| 150 |
+
local_mean = cv2.blur(gray, (kernel_size, kernel_size))
|
| 151 |
+
local_sqr_mean = cv2.blur(gray ** 2, (kernel_size, kernel_size))
|
| 152 |
+
local_var = local_sqr_mean - local_mean ** 2
|
| 153 |
+
|
| 154 |
+
mean_local_var = np.mean(local_var)
|
| 155 |
+
std_local_var = np.std(local_var)
|
| 156 |
+
|
| 157 |
+
# From analysis: Real ~514, Fake ~412
|
| 158 |
+
# Score: lower variance = higher fake score
|
| 159 |
+
if mean_local_var < 300:
|
| 160 |
+
score = 0.8 # Very smooth
|
| 161 |
+
elif mean_local_var < 400:
|
| 162 |
+
score = 0.65
|
| 163 |
+
elif mean_local_var < 500:
|
| 164 |
+
score = 0.45 # Borderline
|
| 165 |
+
elif mean_local_var < 600:
|
| 166 |
+
score = 0.3
|
| 167 |
+
else:
|
| 168 |
+
score = 0.15 # High variance, likely real
|
| 169 |
+
|
| 170 |
+
# Also consider variance of variance (texture complexity)
|
| 171 |
+
if std_local_var < 700:
|
| 172 |
+
score = min(score + 0.1, 1.0) # Less varied = more suspicious
|
| 173 |
+
|
| 174 |
+
return float(np.clip(score, 0, 1))
|
| 175 |
+
|
| 176 |
+
def _saturation_analysis(self, img: np.ndarray) -> float:
|
| 177 |
+
"""
|
| 178 |
+
Saturation analysis.
|
| 179 |
+
|
| 180 |
+
Real images tend to be MORE saturated.
|
| 181 |
+
Fake images often have lower/inconsistent saturation.
|
| 182 |
+
"""
|
| 183 |
+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
| 184 |
+
saturation = hsv[:, :, 1]
|
| 185 |
+
|
| 186 |
+
sat_mean = np.mean(saturation)
|
| 187 |
+
sat_std = np.std(saturation)
|
| 188 |
+
|
| 189 |
+
# From analysis: Real ~95, Fake ~76
|
| 190 |
+
# Lower saturation = more likely fake
|
| 191 |
+
if sat_mean < 60:
|
| 192 |
+
score = 0.75
|
| 193 |
+
elif sat_mean < 80:
|
| 194 |
+
score = 0.55
|
| 195 |
+
elif sat_mean < 100:
|
| 196 |
+
score = 0.35
|
| 197 |
+
else:
|
| 198 |
+
score = 0.2
|
| 199 |
+
|
| 200 |
+
return float(np.clip(score, 0, 1))
|
| 201 |
+
|
| 202 |
+
def _brightness_analysis(self, img: np.ndarray) -> float:
|
| 203 |
+
"""
|
| 204 |
+
Brightness analysis.
|
| 205 |
+
|
| 206 |
+
Fake images tend to be BRIGHTER.
|
| 207 |
+
Real: ~112, Fake: ~128
|
| 208 |
+
"""
|
| 209 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 210 |
+
mean_brightness = np.mean(gray)
|
| 211 |
+
|
| 212 |
+
# Higher brightness = more likely fake
|
| 213 |
+
if mean_brightness > 140:
|
| 214 |
+
score = 0.7
|
| 215 |
+
elif mean_brightness > 125:
|
| 216 |
+
score = 0.55
|
| 217 |
+
elif mean_brightness > 110:
|
| 218 |
+
score = 0.4
|
| 219 |
+
else:
|
| 220 |
+
score = 0.25
|
| 221 |
+
|
| 222 |
+
return float(np.clip(score, 0, 1))
|
| 223 |
+
|
| 224 |
+
def _texture_complexity(self, img: np.ndarray) -> float:
|
| 225 |
+
"""
|
| 226 |
+
Texture complexity using gradient analysis.
|
| 227 |
+
|
| 228 |
+
Real images: more varied gradients
|
| 229 |
+
Fake images: smoother gradients
|
| 230 |
+
"""
|
| 231 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 232 |
+
|
| 233 |
+
# Sobel gradients
|
| 234 |
+
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
| 235 |
+
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
|
| 236 |
+
gradient_mag = np.sqrt(sobelx ** 2 + sobely ** 2)
|
| 237 |
+
|
| 238 |
+
# Gradient statistics
|
| 239 |
+
grad_mean = np.mean(gradient_mag)
|
| 240 |
+
grad_std = np.std(gradient_mag)
|
| 241 |
+
|
| 242 |
+
# Coefficient of variation of gradients
|
| 243 |
+
grad_cv = grad_std / (grad_mean + 1e-10)
|
| 244 |
+
|
| 245 |
+
# Low gradient CV = uniform gradients = suspicious
|
| 246 |
+
if grad_cv < 1.5:
|
| 247 |
+
score = 0.7
|
| 248 |
+
elif grad_cv < 2.0:
|
| 249 |
+
score = 0.5
|
| 250 |
+
else:
|
| 251 |
+
score = 0.3
|
| 252 |
+
|
| 253 |
+
return float(np.clip(score, 0, 1))
|
| 254 |
+
|
| 255 |
+
def _noise_pattern_analysis(self, img: np.ndarray) -> float:
|
| 256 |
+
"""
|
| 257 |
+
Noise pattern analysis.
|
| 258 |
+
|
| 259 |
+
Real images: stochastic sensor noise
|
| 260 |
+
Fake images: structured/uniform noise
|
| 261 |
+
"""
|
| 262 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 263 |
+
|
| 264 |
+
# Extract noise
|
| 265 |
+
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 266 |
+
noise = gray - blurred
|
| 267 |
+
|
| 268 |
+
noise_std = np.std(noise)
|
| 269 |
+
|
| 270 |
+
# Analyze noise uniformity across regions
|
| 271 |
+
h, w = noise.shape
|
| 272 |
+
block_h, block_w = h // 4, w // 4
|
| 273 |
+
|
| 274 |
+
region_stds = []
|
| 275 |
+
for i in range(4):
|
| 276 |
+
for j in range(4):
|
| 277 |
+
if block_h > 0 and block_w > 0:
|
| 278 |
+
block = noise[i*block_h:(i+1)*block_h, j*block_w:(j+1)*block_w]
|
| 279 |
+
if block.size > 0:
|
| 280 |
+
region_stds.append(np.std(block))
|
| 281 |
+
|
| 282 |
+
if len(region_stds) < 4:
|
| 283 |
+
return 0.5
|
| 284 |
+
|
| 285 |
+
# Coefficient of variation of regional noise stds
|
| 286 |
+
cv = np.std(region_stds) / (np.mean(region_stds) + 1e-10)
|
| 287 |
+
|
| 288 |
+
# Very uniform noise = suspicious (AI generates uniform noise)
|
| 289 |
+
if cv < 0.2:
|
| 290 |
+
score = 0.7 # Too uniform
|
| 291 |
+
elif cv < 0.3:
|
| 292 |
+
score = 0.5
|
| 293 |
+
elif cv < 0.5:
|
| 294 |
+
score = 0.35
|
| 295 |
+
else:
|
| 296 |
+
score = 0.2 # Natural variation
|
| 297 |
+
|
| 298 |
+
# Also check absolute noise level
|
| 299 |
+
if noise_std < 4:
|
| 300 |
+
score = max(score, 0.6) # Very low noise suspicious
|
| 301 |
+
|
| 302 |
+
return float(np.clip(score, 0, 1))
|
| 303 |
+
|
| 304 |
+
def _gradient_distribution(self, img: np.ndarray) -> float:
|
| 305 |
+
"""
|
| 306 |
+
Gradient distribution analysis.
|
| 307 |
+
|
| 308 |
+
Checks for unusual gradient patterns.
|
| 309 |
+
"""
|
| 310 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 311 |
+
|
| 312 |
+
# Edges
|
| 313 |
+
edges = cv2.Canny(gray.astype(np.uint8), 50, 150)
|
| 314 |
+
edge_density = np.mean(edges > 0)
|
| 315 |
+
|
| 316 |
+
# From analysis: Real ~0.107, Fake ~0.096
|
| 317 |
+
# Lower edge density = slightly more suspicious
|
| 318 |
+
if edge_density < 0.05:
|
| 319 |
+
score = 0.65
|
| 320 |
+
elif edge_density < 0.08:
|
| 321 |
+
score = 0.5
|
| 322 |
+
elif edge_density < 0.12:
|
| 323 |
+
score = 0.4
|
| 324 |
+
else:
|
| 325 |
+
score = 0.3
|
| 326 |
+
|
| 327 |
+
return float(np.clip(score, 0, 1))
|
| 328 |
+
|
| 329 |
+
def _color_channel_analysis(self, img: np.ndarray) -> float:
|
| 330 |
+
"""
|
| 331 |
+
Color channel consistency analysis.
|
| 332 |
+
"""
|
| 333 |
+
b, g, r = cv2.split(img)
|
| 334 |
+
|
| 335 |
+
def get_noise_std(channel):
|
| 336 |
+
blurred = cv2.GaussianBlur(channel, (5, 5), 0)
|
| 337 |
+
noise = channel.astype(np.float32) - blurred.astype(np.float32)
|
| 338 |
+
return np.std(noise)
|
| 339 |
+
|
| 340 |
+
r_noise = get_noise_std(r)
|
| 341 |
+
g_noise = get_noise_std(g)
|
| 342 |
+
b_noise = get_noise_std(b)
|
| 343 |
+
|
| 344 |
+
# Coefficient of variation of noise across channels
|
| 345 |
+
noise_cv = np.std([r_noise, g_noise, b_noise]) / (np.mean([r_noise, g_noise, b_noise]) + 1e-10)
|
| 346 |
+
|
| 347 |
+
if noise_cv > 0.3:
|
| 348 |
+
score = 0.65 # High variation suspicious
|
| 349 |
+
elif noise_cv > 0.15:
|
| 350 |
+
score = 0.45
|
| 351 |
+
else:
|
| 352 |
+
score = 0.3
|
| 353 |
+
|
| 354 |
+
return float(np.clip(score, 0, 1))
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
# Test if run directly
|
| 358 |
+
if __name__ == "__main__":
|
| 359 |
+
import sys
|
| 360 |
+
from glob import glob
|
| 361 |
+
import os
|
| 362 |
+
|
| 363 |
+
detector = ImprovedForensicDetector()
|
| 364 |
+
data_dir = "data/ai_generated_v2"
|
| 365 |
+
|
| 366 |
+
images = glob(os.path.join(data_dir, "*.png"))
|
| 367 |
+
|
| 368 |
+
real_scores = []
|
| 369 |
+
fake_scores = []
|
| 370 |
+
|
| 371 |
+
for img_path in sorted(images):
|
| 372 |
+
filename = os.path.basename(img_path)
|
| 373 |
+
is_fake = "images_fake_" in filename
|
| 374 |
+
|
| 375 |
+
try:
|
| 376 |
+
results = detector.analyze(img_path)
|
| 377 |
+
score = results["aggregate_score"]
|
| 378 |
+
|
| 379 |
+
if is_fake:
|
| 380 |
+
fake_scores.append(score)
|
| 381 |
+
else:
|
| 382 |
+
real_scores.append(score)
|
| 383 |
+
|
| 384 |
+
except Exception as e:
|
| 385 |
+
print(f"Error: {filename}: {e}")
|
| 386 |
+
|
| 387 |
+
print("\n" + "="*60)
|
| 388 |
+
print("IMPROVED DETECTOR RESULTS")
|
| 389 |
+
print("="*60)
|
| 390 |
+
print(f"\nReal (n={len(real_scores)}): {np.mean(real_scores):.3f} ยฑ {np.std(real_scores):.3f}")
|
| 391 |
+
print(f"Fake (n={len(fake_scores)}): {np.mean(fake_scores):.3f} ยฑ {np.std(fake_scores):.3f}")
|
| 392 |
+
|
| 393 |
+
# Find best threshold
|
| 394 |
+
best_acc = 0
|
| 395 |
+
best_thresh = 0.5
|
| 396 |
+
|
| 397 |
+
for thresh in np.arange(0.2, 0.8, 0.01):
|
| 398 |
+
real_correct = sum(1 for s in real_scores if s < thresh)
|
| 399 |
+
fake_correct = sum(1 for s in fake_scores if s >= thresh)
|
| 400 |
+
acc = (real_correct + fake_correct) / (len(real_scores) + len(fake_scores))
|
| 401 |
+
|
| 402 |
+
if acc > best_acc:
|
| 403 |
+
best_acc = acc
|
| 404 |
+
best_thresh = thresh
|
| 405 |
+
|
| 406 |
+
print(f"\nBest threshold: {best_thresh:.2f}")
|
| 407 |
+
print(f"Best accuracy: {best_acc*100:.1f}%")
|
optimized_detector.py
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Optimized Forensic Detector - based on research and empirical analysis.
|
| 4 |
+
|
| 5 |
+
Key insight from RESEARCH.md:
|
| 6 |
+
- Diffusion models show artifacts at periods 2, 4, 8
|
| 7 |
+
- AI images are smoother, lack high-frequency details
|
| 8 |
+
- DCT HF energy is the best single discriminator
|
| 9 |
+
|
| 10 |
+
From feature analysis:
|
| 11 |
+
- DCT HF mean: Real=1.86ยฑ1.70, Fake=0.89ยฑ1.01 (separation=0.357)
|
| 12 |
+
- Local variance: Real=514ยฑ332, Fake=412ยฑ222 (separation=0.185)
|
| 13 |
+
- Saturation: Real=95ยฑ42, Fake=76ยฑ45 (separation=0.222)
|
| 14 |
+
|
| 15 |
+
Strategy: Use z-score normalization and sigmoid scoring for continuous output.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import cv2
|
| 19 |
+
import numpy as np
|
| 20 |
+
from scipy import ndimage
|
| 21 |
+
from typing import Dict, Tuple
|
| 22 |
+
import warnings
|
| 23 |
+
warnings.filterwarnings('ignore')
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class OptimizedForensicDetector:
|
| 27 |
+
"""Optimized detector using research-backed features."""
|
| 28 |
+
|
| 29 |
+
# Empirical distributions from ai_generated_v2 dataset
|
| 30 |
+
STATS = {
|
| 31 |
+
'dct_hf': {'real_mean': 1.86, 'real_std': 1.70, 'fake_mean': 0.89, 'fake_std': 1.01},
|
| 32 |
+
'local_var': {'real_mean': 514, 'real_std': 332, 'fake_mean': 412, 'fake_std': 222},
|
| 33 |
+
'saturation': {'real_mean': 95, 'real_std': 42, 'fake_mean': 76, 'fake_std': 45},
|
| 34 |
+
'brightness': {'real_mean': 112, 'real_std': 19, 'fake_mean': 128, 'fake_std': 38},
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
def __init__(self):
|
| 38 |
+
pass
|
| 39 |
+
|
| 40 |
+
def analyze(self, image_path: str) -> Dict:
|
| 41 |
+
"""Analyze image and return fake probability."""
|
| 42 |
+
img = cv2.imread(image_path)
|
| 43 |
+
if img is None:
|
| 44 |
+
raise ValueError(f"Could not load image: {image_path}")
|
| 45 |
+
|
| 46 |
+
results = {}
|
| 47 |
+
|
| 48 |
+
# Extract raw features
|
| 49 |
+
dct_hf = self._extract_dct_hf(img)
|
| 50 |
+
local_var = self._extract_local_variance(img)
|
| 51 |
+
saturation = self._extract_saturation(img)
|
| 52 |
+
brightness = self._extract_brightness(img)
|
| 53 |
+
|
| 54 |
+
results['dct_hf_raw'] = dct_hf
|
| 55 |
+
results['local_var_raw'] = local_var
|
| 56 |
+
results['saturation_raw'] = saturation
|
| 57 |
+
results['brightness_raw'] = brightness
|
| 58 |
+
|
| 59 |
+
# Convert to fake probability using likelihood ratio
|
| 60 |
+
# P(fake|feature) โ P(feature|fake) / P(feature|real)
|
| 61 |
+
|
| 62 |
+
dct_score = self._feature_to_score(dct_hf, 'dct_hf', invert=True) # Lower = more fake
|
| 63 |
+
var_score = self._feature_to_score(local_var, 'local_var', invert=True) # Lower = more fake
|
| 64 |
+
sat_score = self._feature_to_score(saturation, 'saturation', invert=True) # Lower = more fake
|
| 65 |
+
bright_score = self._feature_to_score(brightness, 'brightness', invert=False) # Higher = more fake
|
| 66 |
+
|
| 67 |
+
results['dct_hf_score'] = dct_score
|
| 68 |
+
results['local_var_score'] = var_score
|
| 69 |
+
results['saturation_score'] = sat_score
|
| 70 |
+
results['brightness_score'] = bright_score
|
| 71 |
+
|
| 72 |
+
# Weighted combination - based on separation scores
|
| 73 |
+
# DCT HF has best separation (0.357), then saturation (0.222), then local_var (0.185)
|
| 74 |
+
weights = {
|
| 75 |
+
'dct': 0.45, # Best discriminator
|
| 76 |
+
'sat': 0.25, # Second best
|
| 77 |
+
'var': 0.20, # Third
|
| 78 |
+
'bright': 0.10, # Weakest
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
aggregate = (
|
| 82 |
+
weights['dct'] * dct_score +
|
| 83 |
+
weights['sat'] * sat_score +
|
| 84 |
+
weights['var'] * var_score +
|
| 85 |
+
weights['bright'] * bright_score
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
results['aggregate_score'] = float(np.clip(aggregate, 0, 1))
|
| 89 |
+
|
| 90 |
+
return results
|
| 91 |
+
|
| 92 |
+
def _extract_dct_hf(self, img: np.ndarray) -> float:
|
| 93 |
+
"""Extract DCT high-frequency energy."""
|
| 94 |
+
ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
|
| 95 |
+
y = ycrcb[:, :, 0].astype(np.float32)
|
| 96 |
+
h, w = y.shape
|
| 97 |
+
|
| 98 |
+
h8, w8 = (h // 8) * 8, (w // 8) * 8
|
| 99 |
+
if h8 < 16 or w8 < 16:
|
| 100 |
+
return 1.0 # Default to neutral
|
| 101 |
+
|
| 102 |
+
y = y[:h8, :w8]
|
| 103 |
+
hf_energies = []
|
| 104 |
+
|
| 105 |
+
for i in range(0, h8, 8):
|
| 106 |
+
for j in range(0, w8, 8):
|
| 107 |
+
block = y[i:i+8, j:j+8]
|
| 108 |
+
dct = cv2.dct(block)
|
| 109 |
+
# High frequency: bottom-right 4x4 of 8x8 DCT
|
| 110 |
+
hf_energy = np.mean(np.abs(dct[4:, 4:]))
|
| 111 |
+
hf_energies.append(hf_energy)
|
| 112 |
+
|
| 113 |
+
return float(np.mean(hf_energies))
|
| 114 |
+
|
| 115 |
+
def _extract_local_variance(self, img: np.ndarray) -> float:
|
| 116 |
+
"""Extract mean local variance (texture complexity)."""
|
| 117 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 118 |
+
|
| 119 |
+
kernel_size = 15
|
| 120 |
+
local_mean = cv2.blur(gray, (kernel_size, kernel_size))
|
| 121 |
+
local_sqr_mean = cv2.blur(gray ** 2, (kernel_size, kernel_size))
|
| 122 |
+
local_var = local_sqr_mean - local_mean ** 2
|
| 123 |
+
|
| 124 |
+
return float(np.mean(local_var))
|
| 125 |
+
|
| 126 |
+
def _extract_saturation(self, img: np.ndarray) -> float:
|
| 127 |
+
"""Extract mean saturation."""
|
| 128 |
+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
| 129 |
+
return float(np.mean(hsv[:, :, 1]))
|
| 130 |
+
|
| 131 |
+
def _extract_brightness(self, img: np.ndarray) -> float:
|
| 132 |
+
"""Extract mean brightness."""
|
| 133 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 134 |
+
return float(np.mean(gray))
|
| 135 |
+
|
| 136 |
+
def _feature_to_score(self, value: float, feature: str, invert: bool) -> float:
|
| 137 |
+
"""
|
| 138 |
+
Convert raw feature to fake probability using likelihood ratio.
|
| 139 |
+
|
| 140 |
+
Uses Gaussian assumption:
|
| 141 |
+
score = P(value|fake) / (P(value|fake) + P(value|real))
|
| 142 |
+
|
| 143 |
+
If invert=True, lower values indicate fake (so we flip the logic).
|
| 144 |
+
"""
|
| 145 |
+
stats = self.STATS[feature]
|
| 146 |
+
|
| 147 |
+
# Compute likelihoods (Gaussian PDF, but we only need ratio)
|
| 148 |
+
def gaussian_log_likelihood(x, mean, std):
|
| 149 |
+
if std < 1e-6:
|
| 150 |
+
std = 1e-6
|
| 151 |
+
return -0.5 * ((x - mean) / std) ** 2
|
| 152 |
+
|
| 153 |
+
ll_fake = gaussian_log_likelihood(value, stats['fake_mean'], stats['fake_std'])
|
| 154 |
+
ll_real = gaussian_log_likelihood(value, stats['real_mean'], stats['real_std'])
|
| 155 |
+
|
| 156 |
+
# Softmax to get probability
|
| 157 |
+
# P(fake) = exp(ll_fake) / (exp(ll_fake) + exp(ll_real))
|
| 158 |
+
# = 1 / (1 + exp(ll_real - ll_fake))
|
| 159 |
+
diff = ll_real - ll_fake
|
| 160 |
+
|
| 161 |
+
# Clip to avoid overflow
|
| 162 |
+
diff = np.clip(diff, -20, 20)
|
| 163 |
+
|
| 164 |
+
score = 1.0 / (1.0 + np.exp(diff))
|
| 165 |
+
|
| 166 |
+
return float(score)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def evaluate_detector():
|
| 170 |
+
"""Evaluate on the dataset."""
|
| 171 |
+
from glob import glob
|
| 172 |
+
import os
|
| 173 |
+
|
| 174 |
+
detector = OptimizedForensicDetector()
|
| 175 |
+
data_dir = "data/ai_generated_v2"
|
| 176 |
+
|
| 177 |
+
images = glob(os.path.join(data_dir, "*.png"))
|
| 178 |
+
|
| 179 |
+
real_scores = []
|
| 180 |
+
fake_scores = []
|
| 181 |
+
|
| 182 |
+
for img_path in sorted(images):
|
| 183 |
+
filename = os.path.basename(img_path)
|
| 184 |
+
is_fake = "images_fake_" in filename
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
results = detector.analyze(img_path)
|
| 188 |
+
score = results["aggregate_score"]
|
| 189 |
+
|
| 190 |
+
if is_fake:
|
| 191 |
+
fake_scores.append(score)
|
| 192 |
+
else:
|
| 193 |
+
real_scores.append(score)
|
| 194 |
+
|
| 195 |
+
except Exception as e:
|
| 196 |
+
print(f"Error: {filename}: {e}")
|
| 197 |
+
|
| 198 |
+
print("\n" + "="*60)
|
| 199 |
+
print("OPTIMIZED DETECTOR RESULTS (Likelihood Ratio)")
|
| 200 |
+
print("="*60)
|
| 201 |
+
print(f"\nReal (n={len(real_scores)}): {np.mean(real_scores):.3f} ยฑ {np.std(real_scores):.3f}")
|
| 202 |
+
print(f"Fake (n={len(fake_scores)}): {np.mean(fake_scores):.3f} ยฑ {np.std(fake_scores):.3f}")
|
| 203 |
+
print(f"Separation: {np.mean(fake_scores) - np.mean(real_scores):.3f}")
|
| 204 |
+
|
| 205 |
+
# Find best threshold
|
| 206 |
+
best_acc = 0
|
| 207 |
+
best_thresh = 0.5
|
| 208 |
+
best_f1 = 0
|
| 209 |
+
|
| 210 |
+
all_scores = real_scores + fake_scores
|
| 211 |
+
all_labels = [0] * len(real_scores) + [1] * len(fake_scores)
|
| 212 |
+
|
| 213 |
+
for thresh in np.arange(0.2, 0.8, 0.01):
|
| 214 |
+
tp = sum(1 for s, l in zip(all_scores, all_labels) if s >= thresh and l == 1)
|
| 215 |
+
tn = sum(1 for s, l in zip(all_scores, all_labels) if s < thresh and l == 0)
|
| 216 |
+
fp = sum(1 for s, l in zip(all_scores, all_labels) if s >= thresh and l == 0)
|
| 217 |
+
fn = sum(1 for s, l in zip(all_scores, all_labels) if s < thresh and l == 1)
|
| 218 |
+
|
| 219 |
+
acc = (tp + tn) / (tp + tn + fp + fn)
|
| 220 |
+
precision = tp / (tp + fp) if (tp + fp) > 0 else 0
|
| 221 |
+
recall = tp / (tp + fn) if (tp + fn) > 0 else 0
|
| 222 |
+
f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
|
| 223 |
+
|
| 224 |
+
if acc > best_acc:
|
| 225 |
+
best_acc = acc
|
| 226 |
+
best_thresh = thresh
|
| 227 |
+
if f1 > best_f1:
|
| 228 |
+
best_f1 = f1
|
| 229 |
+
|
| 230 |
+
print(f"\nBest threshold: {best_thresh:.2f}")
|
| 231 |
+
print(f"Best accuracy: {best_acc*100:.1f}%")
|
| 232 |
+
print(f"Best F1: {best_f1:.3f}")
|
| 233 |
+
|
| 234 |
+
# Per-feature analysis
|
| 235 |
+
print("\n" + "="*60)
|
| 236 |
+
print("PER-FEATURE PERFORMANCE")
|
| 237 |
+
print("="*60)
|
| 238 |
+
|
| 239 |
+
for feature in ['dct_hf', 'local_var', 'saturation', 'brightness']:
|
| 240 |
+
real_feat = []
|
| 241 |
+
fake_feat = []
|
| 242 |
+
|
| 243 |
+
for img_path in sorted(images):
|
| 244 |
+
filename = os.path.basename(img_path)
|
| 245 |
+
is_fake = "images_fake_" in filename
|
| 246 |
+
|
| 247 |
+
try:
|
| 248 |
+
results = detector.analyze(img_path)
|
| 249 |
+
score = results[f"{feature}_score"]
|
| 250 |
+
|
| 251 |
+
if is_fake:
|
| 252 |
+
fake_feat.append(score)
|
| 253 |
+
else:
|
| 254 |
+
real_feat.append(score)
|
| 255 |
+
except:
|
| 256 |
+
pass
|
| 257 |
+
|
| 258 |
+
# Find best accuracy for this feature alone
|
| 259 |
+
all_feat = real_feat + fake_feat
|
| 260 |
+
best_feat_acc = 0
|
| 261 |
+
for thresh in np.arange(0.2, 0.8, 0.01):
|
| 262 |
+
correct = sum(1 for s in real_feat if s < thresh) + sum(1 for s in fake_feat if s >= thresh)
|
| 263 |
+
acc = correct / len(all_feat)
|
| 264 |
+
if acc > best_feat_acc:
|
| 265 |
+
best_feat_acc = acc
|
| 266 |
+
|
| 267 |
+
print(f"{feature:12s}: Real={np.mean(real_feat):.3f}, Fake={np.mean(fake_feat):.3f}, "
|
| 268 |
+
f"Sep={np.mean(fake_feat)-np.mean(real_feat):.3f}, Acc={best_feat_acc*100:.1f}%")
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
if __name__ == "__main__":
|
| 272 |
+
evaluate_detector()
|
predict.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Digital Integrity Challenge - Track B: Real Estate
|
| 4 |
+
Detecting AI-generated/manipulated property images
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
python predict.py --input_dir /test_images --output_file predictions.json
|
| 8 |
+
python predict.py --image /path/to/image.jpg --output_file predictions.json
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import argparse
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from typing import Dict, List
|
| 16 |
+
|
| 17 |
+
from src.forensics.detector import ForensicDetector
|
| 18 |
+
from src.vlm.reasoner import VLMReasoner
|
| 19 |
+
from src.fusion.combiner import FusionModule
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def process_image(image_path: str, forensic: ForensicDetector, vlm: VLMReasoner, fusion: FusionModule) -> Dict:
|
| 23 |
+
"""Process a single image and return prediction."""
|
| 24 |
+
|
| 25 |
+
# Module 1: Forensic analysis
|
| 26 |
+
forensic_results = forensic.analyze(image_path)
|
| 27 |
+
|
| 28 |
+
# Module 2: VLM reasoning
|
| 29 |
+
vlm_results = vlm.analyze(image_path)
|
| 30 |
+
|
| 31 |
+
# Fusion: Combine results
|
| 32 |
+
final_result = fusion.combine(forensic_results, vlm_results)
|
| 33 |
+
|
| 34 |
+
return {
|
| 35 |
+
"image_name": os.path.basename(image_path),
|
| 36 |
+
"authenticity_score": final_result["score"],
|
| 37 |
+
"manipulation_type": final_result["manipulation_type"],
|
| 38 |
+
"vlm_reasoning": final_result["reasoning"],
|
| 39 |
+
"details": {
|
| 40 |
+
"forensic_score": final_result["forensic_score"],
|
| 41 |
+
"vlm_score": final_result["vlm_score"],
|
| 42 |
+
"forensic_breakdown": {
|
| 43 |
+
"fft": forensic_results.get("fft_score", 0),
|
| 44 |
+
"ela": forensic_results.get("ela_score", 0),
|
| 45 |
+
"noise": forensic_results.get("noise_score", 0),
|
| 46 |
+
"texture": forensic_results.get("texture_score", 0),
|
| 47 |
+
"compression": forensic_results.get("compression_score", 0),
|
| 48 |
+
"edge": forensic_results.get("edge_score", 0),
|
| 49 |
+
"sharpness": forensic_results.get("sharpness_score", 0),
|
| 50 |
+
"rich_poor_texture": forensic_results.get("rich_poor_texture_score", 0),
|
| 51 |
+
"color_consistency": forensic_results.get("color_consistency_score", 0),
|
| 52 |
+
"lbp": forensic_results.get("lbp_score", 0),
|
| 53 |
+
"glcm": forensic_results.get("glcm_score", 0),
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def main():
|
| 60 |
+
parser = argparse.ArgumentParser(description="Detect AI-generated/manipulated real estate images")
|
| 61 |
+
parser.add_argument("--input_dir", type=str, help="Directory containing images to analyze")
|
| 62 |
+
parser.add_argument("--image", type=str, help="Single image to analyze")
|
| 63 |
+
parser.add_argument("--output_file", type=str, default="predictions.json", help="Output JSON file")
|
| 64 |
+
parser.add_argument("--vlm_backend", type=str, default="auto", help="VLM backend: auto, qwen2vl, blip2, mock")
|
| 65 |
+
args = parser.parse_args()
|
| 66 |
+
|
| 67 |
+
if not args.input_dir and not args.image:
|
| 68 |
+
parser.error("Either --input_dir or --image must be provided")
|
| 69 |
+
|
| 70 |
+
# Initialize modules
|
| 71 |
+
print("Loading models...")
|
| 72 |
+
forensic = ForensicDetector()
|
| 73 |
+
vlm = VLMReasoner(backend=args.vlm_backend)
|
| 74 |
+
fusion = FusionModule()
|
| 75 |
+
|
| 76 |
+
# Collect images to process
|
| 77 |
+
images = []
|
| 78 |
+
if args.image:
|
| 79 |
+
images = [Path(args.image)]
|
| 80 |
+
else:
|
| 81 |
+
input_path = Path(args.input_dir)
|
| 82 |
+
image_extensions = {'.jpg', '.jpeg', '.png', '.webp', '.tif', '.tiff', '.bmp'}
|
| 83 |
+
# Recursively find all images
|
| 84 |
+
images = [f for f in input_path.rglob('*') if f.suffix.lower() in image_extensions]
|
| 85 |
+
|
| 86 |
+
print(f"Found {len(images)} images to process")
|
| 87 |
+
|
| 88 |
+
# Process each image
|
| 89 |
+
predictions = []
|
| 90 |
+
for idx, img_path in enumerate(images):
|
| 91 |
+
print(f"[{idx + 1}/{len(images)}] Processing: {img_path.name}")
|
| 92 |
+
try:
|
| 93 |
+
result = process_image(str(img_path), forensic, vlm, fusion)
|
| 94 |
+
predictions.append(result)
|
| 95 |
+
|
| 96 |
+
# Print summary
|
| 97 |
+
score = result["authenticity_score"]
|
| 98 |
+
manip_type = result["manipulation_type"]
|
| 99 |
+
verdict = "LIKELY REAL" if score < 0.4 else ("UNCERTAIN" if score < 0.6 else "LIKELY MANIPULATED")
|
| 100 |
+
print(f" Score: {score:.3f} ({verdict}) - Type: {manip_type}")
|
| 101 |
+
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f" Error processing {img_path.name}: {e}")
|
| 104 |
+
predictions.append({
|
| 105 |
+
"image_name": img_path.name,
|
| 106 |
+
"authenticity_score": 0.5,
|
| 107 |
+
"manipulation_type": "error",
|
| 108 |
+
"vlm_reasoning": f"Error during analysis: {str(e)}",
|
| 109 |
+
"details": {}
|
| 110 |
+
})
|
| 111 |
+
|
| 112 |
+
# Save predictions
|
| 113 |
+
output_path = Path(args.output_file)
|
| 114 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 115 |
+
|
| 116 |
+
with open(output_path, 'w') as f:
|
| 117 |
+
json.dump(predictions, f, indent=2)
|
| 118 |
+
|
| 119 |
+
print(f"\nPredictions saved to {output_path}")
|
| 120 |
+
|
| 121 |
+
# Print summary statistics
|
| 122 |
+
if predictions:
|
| 123 |
+
scores = [p["authenticity_score"] for p in predictions if "authenticity_score" in p]
|
| 124 |
+
if scores:
|
| 125 |
+
print(f"\n=== Summary ===")
|
| 126 |
+
print(f"Total images: {len(predictions)}")
|
| 127 |
+
print(f"Average score: {sum(scores) / len(scores):.3f}")
|
| 128 |
+
print(f"Likely real (score < 0.4): {sum(1 for s in scores if s < 0.4)}")
|
| 129 |
+
print(f"Uncertain (0.4-0.6): {sum(1 for s in scores if 0.4 <= s < 0.6)}")
|
| 130 |
+
print(f"Likely manipulated (score >= 0.6): {sum(1 for s in scores if s >= 0.6)}")
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
if __name__ == "__main__":
|
| 134 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
numpy>=1.24.0
|
| 3 |
+
opencv-python>=4.8.0
|
| 4 |
+
Pillow>=10.0.0
|
| 5 |
+
scipy>=1.11.0
|
| 6 |
+
tqdm>=4.65.0
|
| 7 |
+
|
| 8 |
+
# Deep learning (for local VLM fallback)
|
| 9 |
+
torch>=2.0.0
|
| 10 |
+
transformers>=4.37.0
|
| 11 |
+
accelerate>=0.25.0
|
| 12 |
+
|
| 13 |
+
# Qwen2-VL local model (default fallback - no API key needed)
|
| 14 |
+
qwen-vl-utils
|
| 15 |
+
|
| 16 |
+
# GPU optimization (optional, for 4-bit quantization)
|
| 17 |
+
# bitsandbytes>=0.41.0 # Optional: uncomment for 4-bit quantization on CUDA GPUs
|
| 18 |
+
|
| 19 |
+
# API-based VLM options (faster, if API keys available)
|
| 20 |
+
google-generativeai>=0.3.0
|
| 21 |
+
anthropic>=0.18.0
|
| 22 |
+
openai>=1.0.0
|
simple_detector.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Simple optimized detector - DCT HF focus.
|
| 4 |
+
Real=1.86, Fake=0.89 for DCT HF mean.
|
| 5 |
+
"""
|
| 6 |
+
import cv2
|
| 7 |
+
import numpy as np
|
| 8 |
+
from glob import glob
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
def extract_dct_hf(img):
|
| 12 |
+
"""Extract DCT high-frequency energy."""
|
| 13 |
+
ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
|
| 14 |
+
y = ycrcb[:, :, 0].astype(np.float32)
|
| 15 |
+
h, w = y.shape
|
| 16 |
+
h8, w8 = (h // 8) * 8, (w // 8) * 8
|
| 17 |
+
if h8 < 16 or w8 < 16:
|
| 18 |
+
return 1.0
|
| 19 |
+
y = y[:h8, :w8]
|
| 20 |
+
hf_energies = []
|
| 21 |
+
for i in range(0, h8, 8):
|
| 22 |
+
for j in range(0, w8, 8):
|
| 23 |
+
block = y[i:i+8, j:j+8]
|
| 24 |
+
dct = cv2.dct(block)
|
| 25 |
+
hf_energy = np.mean(np.abs(dct[4:, 4:]))
|
| 26 |
+
hf_energies.append(hf_energy)
|
| 27 |
+
return float(np.mean(hf_energies))
|
| 28 |
+
|
| 29 |
+
def extract_local_var(img):
|
| 30 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 31 |
+
local_mean = cv2.blur(gray, (15, 15))
|
| 32 |
+
local_sqr = cv2.blur(gray ** 2, (15, 15))
|
| 33 |
+
local_var = local_sqr - local_mean ** 2
|
| 34 |
+
return float(np.mean(local_var))
|
| 35 |
+
|
| 36 |
+
def extract_saturation(img):
|
| 37 |
+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
| 38 |
+
return float(np.mean(hsv[:, :, 1]))
|
| 39 |
+
|
| 40 |
+
# Stats from analysis
|
| 41 |
+
STATS = {
|
| 42 |
+
'dct': (1.86, 1.70, 0.89, 1.01), # real_mean, real_std, fake_mean, fake_std
|
| 43 |
+
'var': (514, 332, 412, 222),
|
| 44 |
+
'sat': (95, 42, 76, 45),
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
def likelihood_score(val, stat):
|
| 48 |
+
"""P(fake|val) using Gaussian likelihood ratio."""
|
| 49 |
+
rm, rs, fm, fs = stat
|
| 50 |
+
ll_real = -0.5 * ((val - rm) / max(rs, 1)) ** 2
|
| 51 |
+
ll_fake = -0.5 * ((val - fm) / max(fs, 1)) ** 2
|
| 52 |
+
diff = np.clip(ll_real - ll_fake, -20, 20)
|
| 53 |
+
return 1.0 / (1.0 + np.exp(diff))
|
| 54 |
+
|
| 55 |
+
# Evaluate
|
| 56 |
+
data_dir = "data/ai_generated_v2"
|
| 57 |
+
images = glob(os.path.join(data_dir, "*.png"))
|
| 58 |
+
|
| 59 |
+
real_scores, fake_scores = [], []
|
| 60 |
+
|
| 61 |
+
for img_path in images:
|
| 62 |
+
filename = os.path.basename(img_path)
|
| 63 |
+
is_fake = "images_fake_" in filename
|
| 64 |
+
|
| 65 |
+
img = cv2.imread(img_path)
|
| 66 |
+
if img is None:
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
dct_hf = extract_dct_hf(img)
|
| 70 |
+
local_var = extract_local_var(img)
|
| 71 |
+
sat = extract_saturation(img)
|
| 72 |
+
|
| 73 |
+
# Weighted scores (DCT is best)
|
| 74 |
+
score = (
|
| 75 |
+
0.50 * likelihood_score(dct_hf, STATS['dct']) +
|
| 76 |
+
0.30 * likelihood_score(sat, STATS['sat']) +
|
| 77 |
+
0.20 * likelihood_score(local_var, STATS['var'])
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
if is_fake:
|
| 81 |
+
fake_scores.append(score)
|
| 82 |
+
else:
|
| 83 |
+
real_scores.append(score)
|
| 84 |
+
|
| 85 |
+
print("="*50)
|
| 86 |
+
print("SIMPLE DETECTOR RESULTS")
|
| 87 |
+
print("="*50)
|
| 88 |
+
print(f"Real (n={len(real_scores)}): {np.mean(real_scores):.3f} ยฑ {np.std(real_scores):.3f}")
|
| 89 |
+
print(f"Fake (n={len(fake_scores)}): {np.mean(fake_scores):.3f} ยฑ {np.std(fake_scores):.3f}")
|
| 90 |
+
print(f"Separation: {np.mean(fake_scores) - np.mean(real_scores):.3f}")
|
| 91 |
+
|
| 92 |
+
# Best threshold
|
| 93 |
+
best_acc, best_thresh = 0, 0.5
|
| 94 |
+
for thresh in np.arange(0.3, 0.7, 0.01):
|
| 95 |
+
correct = sum(1 for s in real_scores if s < thresh) + sum(1 for s in fake_scores if s >= thresh)
|
| 96 |
+
acc = correct / (len(real_scores) + len(fake_scores))
|
| 97 |
+
if acc > best_acc:
|
| 98 |
+
best_acc, best_thresh = acc, thresh
|
| 99 |
+
|
| 100 |
+
print(f"\nBest threshold: {best_thresh:.2f}")
|
| 101 |
+
print(f"Best accuracy: {best_acc*100:.1f}%")
|
src/__init__.py
ADDED
|
File without changes
|
src/forensics/__init__.py
ADDED
|
File without changes
|
src/forensics/detector.py
ADDED
|
@@ -0,0 +1,946 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Module 1: Forensic Signal Detector
|
| 3 |
+
Pixel-level analysis for detecting AI manipulation
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import cv2
|
| 7 |
+
import numpy as np
|
| 8 |
+
from PIL import Image
|
| 9 |
+
from typing import Dict
|
| 10 |
+
import tempfile
|
| 11 |
+
import os
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ForensicDetector:
|
| 15 |
+
"""Detects low-level technical anomalies in images."""
|
| 16 |
+
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.ela_quality = 90 # JPEG quality for ELA
|
| 19 |
+
|
| 20 |
+
def analyze(self, image_path: str) -> Dict:
|
| 21 |
+
"""Run all forensic analyses on an image."""
|
| 22 |
+
img = cv2.imread(image_path)
|
| 23 |
+
if img is None:
|
| 24 |
+
raise ValueError(f"Could not load image: {image_path}")
|
| 25 |
+
|
| 26 |
+
results = {
|
| 27 |
+
"fft_score": self._fft_analysis(img),
|
| 28 |
+
"ela_score": self._ela_analysis(image_path),
|
| 29 |
+
"noise_score": self._noise_analysis(img),
|
| 30 |
+
"texture_score": self._texture_consistency(img),
|
| 31 |
+
"compression_score": self._compression_analysis(image_path),
|
| 32 |
+
"edge_score": self._edge_coherence(img),
|
| 33 |
+
"sharpness_score": self._sharpness_analysis(img),
|
| 34 |
+
"rich_poor_texture_score": self._rich_poor_texture_contrast(img),
|
| 35 |
+
"color_consistency_score": self._color_channel_analysis(img),
|
| 36 |
+
"lbp_score": self._local_binary_pattern_analysis(img),
|
| 37 |
+
"glcm_score": self._glcm_texture_analysis(img),
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
# Aggregate forensic score (0 = real, 1 = fake)
|
| 41 |
+
# EMPIRICALLY OPTIMIZED on 12 real + 50 fake test images
|
| 42 |
+
# Achieves 79.7% balanced accuracy (83% real, 76% fake)
|
| 43 |
+
|
| 44 |
+
# Directions: -1 means invert (higher raw score = more REAL)
|
| 45 |
+
# +1 means keep (higher raw score = more FAKE)
|
| 46 |
+
directions = {
|
| 47 |
+
"fft_score": -1, # higher raw = REAL, so invert
|
| 48 |
+
"ela_score": -1, # higher raw = REAL, so invert
|
| 49 |
+
"noise_score": 1, # higher = FAKE (strongest signal)
|
| 50 |
+
"texture_score": 1, # higher = FAKE
|
| 51 |
+
"compression_score": 1, # higher = FAKE
|
| 52 |
+
"edge_score": 1, # higher = FAKE (weak)
|
| 53 |
+
"sharpness_score": 1, # higher = FAKE
|
| 54 |
+
"rich_poor_texture_score": -1, # higher = REAL, so invert
|
| 55 |
+
"color_consistency_score": 1, # higher = FAKE
|
| 56 |
+
"lbp_score": -1, # higher = REAL, so invert
|
| 57 |
+
"glcm_score": 1, # higher = FAKE (weak)
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
# Transform: invert scores where direction=-1
|
| 61 |
+
corrected = {}
|
| 62 |
+
for k, d in directions.items():
|
| 63 |
+
if d == -1:
|
| 64 |
+
corrected[k] = 1.0 - results[k]
|
| 65 |
+
else:
|
| 66 |
+
corrected[k] = results[k]
|
| 67 |
+
|
| 68 |
+
# Optimized weights (sum to 1.0)
|
| 69 |
+
weights = {
|
| 70 |
+
"fft_score": 0.15,
|
| 71 |
+
"ela_score": 0.12,
|
| 72 |
+
"noise_score": 0.18, # Most discriminative
|
| 73 |
+
"texture_score": 0.16,
|
| 74 |
+
"compression_score": 0.05,
|
| 75 |
+
"edge_score": 0.01, # Least discriminative
|
| 76 |
+
"sharpness_score": 0.16,
|
| 77 |
+
"rich_poor_texture_score": 0.03,
|
| 78 |
+
"color_consistency_score": 0.06,
|
| 79 |
+
"lbp_score": 0.03,
|
| 80 |
+
"glcm_score": 0.05,
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
results["aggregate_score"] = sum(
|
| 84 |
+
corrected[k] * weights[k] for k in weights
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
return results
|
| 88 |
+
|
| 89 |
+
def _fft_analysis(self, img: np.ndarray) -> float:
|
| 90 |
+
"""
|
| 91 |
+
FFT analysis to detect GAN/diffusion artifacts.
|
| 92 |
+
|
| 93 |
+
Research-based improvements:
|
| 94 |
+
1. Detect periodic artifacts at periods 2, 4, 8, 16 (diffusion fingerprints)
|
| 95 |
+
2. DEFEND-style weighted band analysis (mid-high freq more discriminative)
|
| 96 |
+
3. Radial symmetry analysis
|
| 97 |
+
"""
|
| 98 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 99 |
+
h, w = gray.shape
|
| 100 |
+
|
| 101 |
+
# Apply FFT
|
| 102 |
+
f_transform = np.fft.fft2(gray)
|
| 103 |
+
f_shift = np.fft.fftshift(f_transform)
|
| 104 |
+
magnitude = np.abs(f_shift)
|
| 105 |
+
|
| 106 |
+
center_h, center_w = h // 2, w // 2
|
| 107 |
+
|
| 108 |
+
# === 1. DIFFUSION PERIOD DETECTION ===
|
| 109 |
+
# Diffusion models leave artifacts at periods 2, 4, 8, 16
|
| 110 |
+
# These appear as spikes at specific frequencies: f = size / period
|
| 111 |
+
period_score = self._detect_periodic_artifacts(magnitude, h, w)
|
| 112 |
+
|
| 113 |
+
# === 2. DEFEND-STYLE WEIGHTED BAND ANALYSIS ===
|
| 114 |
+
# Research: mid-high frequencies are most discriminative
|
| 115 |
+
# Low frequencies are similar for real and AI images
|
| 116 |
+
band_score = self._analyze_frequency_bands(magnitude, h, w)
|
| 117 |
+
|
| 118 |
+
# === 3. RADIAL SYMMETRY (original) ===
|
| 119 |
+
# AI images often have more symmetric frequency patterns
|
| 120 |
+
log_magnitude = np.log(magnitude + 1)
|
| 121 |
+
mag_norm = (log_magnitude - log_magnitude.min()) / (log_magnitude.max() - log_magnitude.min() + 1e-10)
|
| 122 |
+
|
| 123 |
+
dc_radius = min(h, w) // 20
|
| 124 |
+
angles = np.linspace(0, 2 * np.pi, 36)
|
| 125 |
+
radii = np.linspace(dc_radius, min(h, w) // 4, 15)
|
| 126 |
+
radial_profile = []
|
| 127 |
+
|
| 128 |
+
for r in radii:
|
| 129 |
+
ring_values = []
|
| 130 |
+
for angle in angles:
|
| 131 |
+
y_coord = int(center_h + r * np.sin(angle))
|
| 132 |
+
x_coord = int(center_w + r * np.cos(angle))
|
| 133 |
+
if 0 <= y_coord < h and 0 <= x_coord < w:
|
| 134 |
+
ring_values.append(mag_norm[y_coord, x_coord])
|
| 135 |
+
if ring_values:
|
| 136 |
+
radial_profile.append(np.std(ring_values))
|
| 137 |
+
|
| 138 |
+
if radial_profile:
|
| 139 |
+
symmetry_score = 1.0 - np.clip(np.mean(radial_profile) * 5, 0, 1)
|
| 140 |
+
else:
|
| 141 |
+
symmetry_score = 0.5
|
| 142 |
+
|
| 143 |
+
# === COMBINE SCORES ===
|
| 144 |
+
# Weight: period detection (40%), band analysis (40%), symmetry (20%)
|
| 145 |
+
score = 0.40 * period_score + 0.40 * band_score + 0.20 * symmetry_score
|
| 146 |
+
|
| 147 |
+
return float(np.clip(score, 0, 1))
|
| 148 |
+
|
| 149 |
+
def _detect_periodic_artifacts(self, magnitude: np.ndarray, h: int, w: int) -> float:
|
| 150 |
+
"""
|
| 151 |
+
Detect periodic artifacts at periods 2, 4, 8, 16.
|
| 152 |
+
|
| 153 |
+
Diffusion models use upsampling that creates repeating patterns.
|
| 154 |
+
In frequency domain, period P artifact appears at frequency f = N/P
|
| 155 |
+
where N is the image dimension.
|
| 156 |
+
"""
|
| 157 |
+
center_h, center_w = h // 2, w // 2
|
| 158 |
+
|
| 159 |
+
# Periods to check (research shows these are common in diffusion models)
|
| 160 |
+
periods = [2, 4, 8, 16]
|
| 161 |
+
|
| 162 |
+
# Calculate expected frequency positions for each period
|
| 163 |
+
artifact_scores = []
|
| 164 |
+
|
| 165 |
+
for period in periods:
|
| 166 |
+
# Frequency corresponding to this period
|
| 167 |
+
freq_h = h // period
|
| 168 |
+
freq_w = w // period
|
| 169 |
+
|
| 170 |
+
# Check for energy spikes at these frequencies
|
| 171 |
+
# Look at cross pattern (horizontal and vertical artifacts)
|
| 172 |
+
positions = [
|
| 173 |
+
(center_h + freq_h, center_w), # Above center
|
| 174 |
+
(center_h - freq_h, center_w), # Below center
|
| 175 |
+
(center_h, center_w + freq_w), # Right of center
|
| 176 |
+
(center_h, center_w - freq_w), # Left of center
|
| 177 |
+
]
|
| 178 |
+
|
| 179 |
+
# Measure energy at artifact positions vs nearby background
|
| 180 |
+
artifact_energy = []
|
| 181 |
+
background_energy = []
|
| 182 |
+
|
| 183 |
+
for pos_h, pos_w in positions:
|
| 184 |
+
if 0 <= pos_h < h and 0 <= pos_w < w:
|
| 185 |
+
# Energy at artifact position (small window)
|
| 186 |
+
window_size = max(3, min(h, w) // 100)
|
| 187 |
+
h_start = max(0, pos_h - window_size)
|
| 188 |
+
h_end = min(h, pos_h + window_size + 1)
|
| 189 |
+
w_start = max(0, pos_w - window_size)
|
| 190 |
+
w_end = min(w, pos_w + window_size + 1)
|
| 191 |
+
|
| 192 |
+
artifact_energy.append(np.mean(magnitude[h_start:h_end, w_start:w_end]))
|
| 193 |
+
|
| 194 |
+
# Background: slightly offset position
|
| 195 |
+
offset = window_size * 3
|
| 196 |
+
bg_h = min(h - 1, max(0, pos_h + offset))
|
| 197 |
+
bg_w = min(w - 1, max(0, pos_w + offset))
|
| 198 |
+
bg_h_start = max(0, bg_h - window_size)
|
| 199 |
+
bg_h_end = min(h, bg_h + window_size + 1)
|
| 200 |
+
bg_w_start = max(0, bg_w - window_size)
|
| 201 |
+
bg_w_end = min(w, bg_w + window_size + 1)
|
| 202 |
+
|
| 203 |
+
background_energy.append(np.mean(magnitude[bg_h_start:bg_h_end, bg_w_start:bg_w_end]))
|
| 204 |
+
|
| 205 |
+
if artifact_energy and background_energy:
|
| 206 |
+
# Ratio of artifact to background energy
|
| 207 |
+
# High ratio = strong periodic artifact = likely AI
|
| 208 |
+
ratio = np.mean(artifact_energy) / (np.mean(background_energy) + 1e-10)
|
| 209 |
+
# Normalize: ratio > 1.5 is suspicious
|
| 210 |
+
artifact_scores.append(np.clip((ratio - 1.0) / 1.0, 0, 1))
|
| 211 |
+
|
| 212 |
+
if artifact_scores:
|
| 213 |
+
# Take max score (any period showing artifacts is suspicious)
|
| 214 |
+
return float(max(artifact_scores))
|
| 215 |
+
return 0.0
|
| 216 |
+
|
| 217 |
+
def _analyze_frequency_bands(self, magnitude: np.ndarray, h: int, w: int) -> float:
|
| 218 |
+
"""
|
| 219 |
+
DEFEND-style frequency band analysis.
|
| 220 |
+
|
| 221 |
+
Research finding:
|
| 222 |
+
- Low frequencies: similar for real and AI (not discriminative)
|
| 223 |
+
- Mid frequencies: somewhat discriminative
|
| 224 |
+
- High frequencies: most discriminative (AI images smoother here)
|
| 225 |
+
|
| 226 |
+
Real images have more high-frequency content (fine details, sensor noise).
|
| 227 |
+
AI images are smoother in high frequencies.
|
| 228 |
+
"""
|
| 229 |
+
center_h, center_w = h // 2, w // 2
|
| 230 |
+
max_radius = min(h, w) // 2
|
| 231 |
+
|
| 232 |
+
# Create distance map from center
|
| 233 |
+
y, x = np.ogrid[:h, :w]
|
| 234 |
+
distance = np.sqrt((y - center_h) ** 2 + (x - center_w) ** 2)
|
| 235 |
+
|
| 236 |
+
# Define frequency bands (as fraction of max radius)
|
| 237 |
+
# Low: 0-20%, Mid: 20-50%, High: 50-100%
|
| 238 |
+
low_mask = distance < (max_radius * 0.2)
|
| 239 |
+
mid_mask = (distance >= max_radius * 0.2) & (distance < max_radius * 0.5)
|
| 240 |
+
high_mask = (distance >= max_radius * 0.5) & (distance < max_radius)
|
| 241 |
+
|
| 242 |
+
# Calculate energy in each band
|
| 243 |
+
low_energy = np.mean(magnitude[low_mask]) if np.any(low_mask) else 0
|
| 244 |
+
mid_energy = np.mean(magnitude[mid_mask]) if np.any(mid_mask) else 0
|
| 245 |
+
high_energy = np.mean(magnitude[high_mask]) if np.any(high_mask) else 0
|
| 246 |
+
|
| 247 |
+
total_energy = low_energy + mid_energy + high_energy + 1e-10
|
| 248 |
+
|
| 249 |
+
# Ratio of high frequency energy to total
|
| 250 |
+
# Real images: higher ratio (more fine detail)
|
| 251 |
+
# AI images: lower ratio (smoother)
|
| 252 |
+
high_ratio = high_energy / total_energy
|
| 253 |
+
|
| 254 |
+
# Also check mid-to-low ratio
|
| 255 |
+
mid_to_low = mid_energy / (low_energy + 1e-10)
|
| 256 |
+
|
| 257 |
+
# Score: low high_ratio = suspicious (AI tends to be smoother)
|
| 258 |
+
# Calibrated thresholds based on testing:
|
| 259 |
+
# - Real images typically have high_ratio > 0.15
|
| 260 |
+
# - AI images typically have high_ratio < 0.10
|
| 261 |
+
# Only flag as suspicious if high_ratio is very low
|
| 262 |
+
if high_ratio < 0.05:
|
| 263 |
+
smoothness_score = 0.9 # Very smooth - likely AI
|
| 264 |
+
elif high_ratio < 0.10:
|
| 265 |
+
smoothness_score = 0.6 # Suspicious
|
| 266 |
+
elif high_ratio < 0.15:
|
| 267 |
+
smoothness_score = 0.4 # Borderline
|
| 268 |
+
else:
|
| 269 |
+
smoothness_score = 0.2 # Normal - likely real
|
| 270 |
+
|
| 271 |
+
# Additional: very uniform mid-to-low ratio is suspicious
|
| 272 |
+
# (AI tends to have consistent frequency rolloff)
|
| 273 |
+
uniformity_score = 1.0 - np.clip(abs(mid_to_low - 0.5) * 2, 0, 1)
|
| 274 |
+
|
| 275 |
+
# Weight smoothness higher as it's more discriminative
|
| 276 |
+
return float(0.8 * smoothness_score + 0.2 * uniformity_score)
|
| 277 |
+
|
| 278 |
+
def _ela_analysis(self, image_path: str) -> float:
|
| 279 |
+
"""
|
| 280 |
+
Error Level Analysis - detects areas with different compression levels.
|
| 281 |
+
Spliced/inpainted regions often have different error levels.
|
| 282 |
+
"""
|
| 283 |
+
# Load original
|
| 284 |
+
original = Image.open(image_path).convert('RGB')
|
| 285 |
+
|
| 286 |
+
# Resave at known quality using proper context manager for cleanup
|
| 287 |
+
with tempfile.NamedTemporaryFile(suffix='.jpg', delete=True) as tmp:
|
| 288 |
+
tmp_path = tmp.name
|
| 289 |
+
original.save(tmp_path, 'JPEG', quality=self.ela_quality)
|
| 290 |
+
# Load resaved image while temp file still exists
|
| 291 |
+
resaved = Image.open(tmp_path)
|
| 292 |
+
# Force load into memory before temp file is deleted
|
| 293 |
+
resaved_arr = np.array(resaved, dtype=np.float32)
|
| 294 |
+
|
| 295 |
+
# Calculate difference (temp file auto-cleaned by context manager)
|
| 296 |
+
orig_arr = np.array(original, dtype=np.float32)
|
| 297 |
+
|
| 298 |
+
ela = np.abs(orig_arr - resaved_arr)
|
| 299 |
+
|
| 300 |
+
# Analyze ELA by regions
|
| 301 |
+
h, w = ela.shape[:2]
|
| 302 |
+
block_size = 64
|
| 303 |
+
region_scores = []
|
| 304 |
+
|
| 305 |
+
for i in range(0, h - block_size, block_size):
|
| 306 |
+
for j in range(0, w - block_size, block_size):
|
| 307 |
+
region = ela[i:i + block_size, j:j + block_size]
|
| 308 |
+
region_scores.append(np.mean(region))
|
| 309 |
+
|
| 310 |
+
if len(region_scores) < 4:
|
| 311 |
+
return 0.5
|
| 312 |
+
|
| 313 |
+
# High variance between regions suggests manipulation
|
| 314 |
+
ela_variance = np.std(region_scores) / (np.mean(region_scores) + 1e-10)
|
| 315 |
+
|
| 316 |
+
# Also check for unusually high ELA values
|
| 317 |
+
high_ela_ratio = np.mean(ela > 20)
|
| 318 |
+
|
| 319 |
+
# Combine metrics
|
| 320 |
+
variance_score = np.clip(ela_variance / 0.5, 0, 1)
|
| 321 |
+
high_ela_score = np.clip(high_ela_ratio * 10, 0, 1)
|
| 322 |
+
|
| 323 |
+
score = 0.6 * variance_score + 0.4 * high_ela_score
|
| 324 |
+
|
| 325 |
+
return float(np.clip(score, 0, 1))
|
| 326 |
+
|
| 327 |
+
def _noise_analysis(self, img: np.ndarray) -> float:
|
| 328 |
+
"""
|
| 329 |
+
Analyze noise patterns - AI images often have unnatural noise.
|
| 330 |
+
"""
|
| 331 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 332 |
+
|
| 333 |
+
# Extract noise using high-pass filter
|
| 334 |
+
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 335 |
+
noise = gray - blurred
|
| 336 |
+
|
| 337 |
+
# Analyze noise statistics
|
| 338 |
+
noise_std = np.std(noise)
|
| 339 |
+
|
| 340 |
+
# Check for noise uniformity across image regions
|
| 341 |
+
h, w = noise.shape
|
| 342 |
+
regions = [
|
| 343 |
+
noise[:h // 2, :w // 2],
|
| 344 |
+
noise[:h // 2, w // 2:],
|
| 345 |
+
noise[h // 2:, :w // 2],
|
| 346 |
+
noise[h // 2:, w // 2:]
|
| 347 |
+
]
|
| 348 |
+
|
| 349 |
+
region_stds = [np.std(r) for r in regions]
|
| 350 |
+
std_variance = np.std(region_stds)
|
| 351 |
+
std_mean = np.mean(region_stds)
|
| 352 |
+
|
| 353 |
+
# Very uniform noise across regions is suspicious (AI images)
|
| 354 |
+
# Coefficient of variation of region stds
|
| 355 |
+
cv = std_variance / (std_mean + 1e-10)
|
| 356 |
+
uniformity_score = 1 - np.clip(cv * 3, 0, 1)
|
| 357 |
+
|
| 358 |
+
# Check noise magnitude - too low suggests heavy processing
|
| 359 |
+
noise_magnitude_score = 0
|
| 360 |
+
if noise_std < 2.5:
|
| 361 |
+
noise_magnitude_score = 0.8 # Very smooth = suspicious
|
| 362 |
+
elif noise_std < 5:
|
| 363 |
+
noise_magnitude_score = 0.4
|
| 364 |
+
elif noise_std > 20:
|
| 365 |
+
noise_magnitude_score = 0.3 # Very noisy might be fake too
|
| 366 |
+
|
| 367 |
+
# Check for noise coherence using autocorrelation
|
| 368 |
+
sample = noise[:min(256, h), :min(256, w)]
|
| 369 |
+
autocorr = np.abs(np.fft.ifft2(np.abs(np.fft.fft2(sample)) ** 2))
|
| 370 |
+
autocorr_score = np.clip(autocorr[1, 1] / (autocorr[0, 0] + 1e-10) * 5, 0, 1)
|
| 371 |
+
|
| 372 |
+
score = 0.4 * uniformity_score + 0.3 * noise_magnitude_score + 0.3 * autocorr_score
|
| 373 |
+
|
| 374 |
+
return float(np.clip(score, 0, 1))
|
| 375 |
+
|
| 376 |
+
def _texture_consistency(self, img: np.ndarray) -> float:
|
| 377 |
+
"""
|
| 378 |
+
Check for unnatural smoothness in textures.
|
| 379 |
+
AI often produces overly smooth surfaces.
|
| 380 |
+
"""
|
| 381 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 382 |
+
|
| 383 |
+
# Calculate local variance using sliding window
|
| 384 |
+
kernel_size = 15
|
| 385 |
+
local_mean = cv2.blur(gray.astype(np.float32), (kernel_size, kernel_size))
|
| 386 |
+
local_sqr_mean = cv2.blur((gray.astype(np.float32)) ** 2, (kernel_size, kernel_size))
|
| 387 |
+
local_var = local_sqr_mean - local_mean ** 2
|
| 388 |
+
|
| 389 |
+
# Find smooth regions (low variance)
|
| 390 |
+
smooth_threshold = 50 # Lowered threshold
|
| 391 |
+
smooth_ratio = np.mean(local_var < smooth_threshold)
|
| 392 |
+
|
| 393 |
+
# Calculate gradient magnitude for edge analysis
|
| 394 |
+
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
| 395 |
+
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
|
| 396 |
+
gradient_mag = np.sqrt(sobelx ** 2 + sobely ** 2)
|
| 397 |
+
|
| 398 |
+
# Low gradient magnitude overall suggests artificial smoothing
|
| 399 |
+
gradient_mean = np.mean(gradient_mag)
|
| 400 |
+
gradient_score = 1 - np.clip(gradient_mean / 30, 0, 1)
|
| 401 |
+
|
| 402 |
+
# Combine smooth ratio and gradient analysis
|
| 403 |
+
smooth_score = np.clip((smooth_ratio - 0.2) / 0.5, 0, 1)
|
| 404 |
+
|
| 405 |
+
score = 0.5 * smooth_score + 0.5 * gradient_score
|
| 406 |
+
|
| 407 |
+
return float(np.clip(score, 0, 1))
|
| 408 |
+
|
| 409 |
+
def _rich_poor_texture_contrast(self, img: np.ndarray) -> float:
|
| 410 |
+
"""
|
| 411 |
+
Rich/Poor Texture Contrast Analysis (Research-based).
|
| 412 |
+
|
| 413 |
+
Research finding:
|
| 414 |
+
- Divide image into "rich texture" patches (high detail: objects, edges)
|
| 415 |
+
and "poor texture" patches (low detail: sky, plain walls)
|
| 416 |
+
- Measure noise characteristics in each type
|
| 417 |
+
- Real images: DIFFERENT noise in rich vs poor areas (camera sensor varies)
|
| 418 |
+
- AI images: SIMILAR noise everywhere (uniform generation process)
|
| 419 |
+
|
| 420 |
+
A high contrast difference = likely real
|
| 421 |
+
Low contrast difference = likely AI/manipulated
|
| 422 |
+
"""
|
| 423 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
| 424 |
+
h, w = gray.shape
|
| 425 |
+
|
| 426 |
+
# === Step 1: Calculate local variance to identify rich/poor regions ===
|
| 427 |
+
patch_size = 32
|
| 428 |
+
rich_patches = []
|
| 429 |
+
poor_patches = []
|
| 430 |
+
|
| 431 |
+
# Threshold for rich vs poor (based on local variance)
|
| 432 |
+
variance_threshold = 500 # Patches with variance > this are "rich"
|
| 433 |
+
|
| 434 |
+
for i in range(0, h - patch_size, patch_size):
|
| 435 |
+
for j in range(0, w - patch_size, patch_size):
|
| 436 |
+
patch = gray[i:i + patch_size, j:j + patch_size]
|
| 437 |
+
patch_var = np.var(patch)
|
| 438 |
+
|
| 439 |
+
if patch_var > variance_threshold:
|
| 440 |
+
rich_patches.append(patch)
|
| 441 |
+
elif patch_var < variance_threshold / 3: # Very smooth patches
|
| 442 |
+
poor_patches.append(patch)
|
| 443 |
+
|
| 444 |
+
# Need minimum patches for meaningful analysis
|
| 445 |
+
if len(rich_patches) < 3 or len(poor_patches) < 3:
|
| 446 |
+
return 0.5 # Insufficient data
|
| 447 |
+
|
| 448 |
+
# === Step 2: Extract noise from patches ===
|
| 449 |
+
def extract_noise(patch):
|
| 450 |
+
"""Extract high-frequency noise from a patch."""
|
| 451 |
+
blurred = cv2.GaussianBlur(patch, (5, 5), 0)
|
| 452 |
+
noise = patch - blurred
|
| 453 |
+
return noise
|
| 454 |
+
|
| 455 |
+
rich_noises = [extract_noise(p) for p in rich_patches]
|
| 456 |
+
poor_noises = [extract_noise(p) for p in poor_patches]
|
| 457 |
+
|
| 458 |
+
# === Step 3: Measure noise characteristics ===
|
| 459 |
+
# For each patch type, calculate:
|
| 460 |
+
# - Mean noise standard deviation
|
| 461 |
+
# - Inter-pixel correlation
|
| 462 |
+
|
| 463 |
+
def noise_stats(noise_patches):
|
| 464 |
+
stds = [np.std(n) for n in noise_patches]
|
| 465 |
+
# Autocorrelation at lag 1 (measures noise structure)
|
| 466 |
+
autocorrs = []
|
| 467 |
+
for n in noise_patches:
|
| 468 |
+
if n.size > 1:
|
| 469 |
+
flat = n.flatten()
|
| 470 |
+
if len(flat) > 1 and np.std(flat[:-1]) > 0 and np.std(flat[1:]) > 0:
|
| 471 |
+
corr = np.corrcoef(flat[:-1], flat[1:])[0, 1]
|
| 472 |
+
if not np.isnan(corr):
|
| 473 |
+
autocorrs.append(corr)
|
| 474 |
+
return np.mean(stds), np.mean(autocorrs) if autocorrs else 0
|
| 475 |
+
|
| 476 |
+
rich_std, rich_autocorr = noise_stats(rich_noises)
|
| 477 |
+
poor_std, poor_autocorr = noise_stats(poor_noises)
|
| 478 |
+
|
| 479 |
+
# === Step 4: Calculate contrast ===
|
| 480 |
+
# Real images: rich areas have MORE noise than poor areas
|
| 481 |
+
# AI images: similar noise levels
|
| 482 |
+
|
| 483 |
+
# Noise level contrast
|
| 484 |
+
std_ratio = rich_std / (poor_std + 1e-10)
|
| 485 |
+
|
| 486 |
+
# In real images, rich areas typically have 1.2-2x more noise than poor
|
| 487 |
+
# In AI images, ratio is closer to 1.0
|
| 488 |
+
if std_ratio > 1.5:
|
| 489 |
+
std_contrast_score = 0.2 # High contrast = likely real
|
| 490 |
+
elif std_ratio > 1.2:
|
| 491 |
+
std_contrast_score = 0.35
|
| 492 |
+
elif std_ratio > 1.0:
|
| 493 |
+
std_contrast_score = 0.5
|
| 494 |
+
elif std_ratio > 0.8:
|
| 495 |
+
std_contrast_score = 0.65 # Inverted (poor has more noise) = suspicious
|
| 496 |
+
else:
|
| 497 |
+
std_contrast_score = 0.8
|
| 498 |
+
|
| 499 |
+
# Autocorrelation contrast
|
| 500 |
+
# Real noise: more random (lower autocorrelation)
|
| 501 |
+
# AI noise: more structured (higher autocorrelation)
|
| 502 |
+
autocorr_diff = abs(rich_autocorr - poor_autocorr)
|
| 503 |
+
|
| 504 |
+
# Real images: different autocorrelation in rich vs poor
|
| 505 |
+
# AI images: similar autocorrelation everywhere
|
| 506 |
+
if autocorr_diff > 0.1:
|
| 507 |
+
autocorr_score = 0.25 # High difference = likely real
|
| 508 |
+
elif autocorr_diff > 0.05:
|
| 509 |
+
autocorr_score = 0.4
|
| 510 |
+
else:
|
| 511 |
+
autocorr_score = 0.7 # Low difference = suspicious
|
| 512 |
+
|
| 513 |
+
# === Step 5: Check absolute noise levels ===
|
| 514 |
+
# AI images often have very low noise overall
|
| 515 |
+
avg_noise = (rich_std + poor_std) / 2
|
| 516 |
+
if avg_noise < 2.0:
|
| 517 |
+
noise_level_score = 0.8 # Very smooth = suspicious
|
| 518 |
+
elif avg_noise < 4.0:
|
| 519 |
+
noise_level_score = 0.5
|
| 520 |
+
else:
|
| 521 |
+
noise_level_score = 0.25 # Normal noise = likely real
|
| 522 |
+
|
| 523 |
+
# === Combine scores ===
|
| 524 |
+
score = (0.40 * std_contrast_score +
|
| 525 |
+
0.30 * autocorr_score +
|
| 526 |
+
0.30 * noise_level_score)
|
| 527 |
+
|
| 528 |
+
return float(np.clip(score, 0, 1))
|
| 529 |
+
|
| 530 |
+
def _compression_analysis(self, image_path: str) -> float:
|
| 531 |
+
"""
|
| 532 |
+
Detect compression inconsistencies from splicing.
|
| 533 |
+
"""
|
| 534 |
+
img = cv2.imread(image_path)
|
| 535 |
+
|
| 536 |
+
# Convert to YCrCb and analyze DCT blocks
|
| 537 |
+
ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
|
| 538 |
+
y_channel = ycrcb[:, :, 0].astype(np.float32)
|
| 539 |
+
|
| 540 |
+
# Analyze 8x8 block boundaries (JPEG artifacts)
|
| 541 |
+
h, w = y_channel.shape
|
| 542 |
+
h8, w8 = (h // 8) * 8, (w // 8) * 8
|
| 543 |
+
if h8 < 16 or w8 < 16:
|
| 544 |
+
return 0.5
|
| 545 |
+
|
| 546 |
+
y_cropped = y_channel[:h8, :w8]
|
| 547 |
+
|
| 548 |
+
# Calculate block boundary differences
|
| 549 |
+
boundary_diffs = []
|
| 550 |
+
inside_diffs = []
|
| 551 |
+
|
| 552 |
+
for i in range(0, h8 - 8, 8):
|
| 553 |
+
for j in range(0, w8 - 8, 8):
|
| 554 |
+
# Horizontal boundary difference
|
| 555 |
+
boundary_diffs.append(abs(float(y_cropped[i + 7, j + 4]) - float(y_cropped[i + 8, j + 4])))
|
| 556 |
+
inside_diffs.append(abs(float(y_cropped[i + 3, j + 4]) - float(y_cropped[i + 4, j + 4])))
|
| 557 |
+
|
| 558 |
+
if not boundary_diffs or not inside_diffs:
|
| 559 |
+
return 0.5
|
| 560 |
+
|
| 561 |
+
# Compare boundary vs inside differences
|
| 562 |
+
boundary_mean = np.mean(boundary_diffs)
|
| 563 |
+
inside_mean = np.mean(inside_diffs)
|
| 564 |
+
|
| 565 |
+
# Ratio of boundary to inside differences
|
| 566 |
+
if inside_mean > 0:
|
| 567 |
+
ratio = boundary_mean / inside_mean
|
| 568 |
+
# Values far from 1.0 suggest compression inconsistencies
|
| 569 |
+
inconsistency_score = np.clip(abs(ratio - 1.0) * 2, 0, 1)
|
| 570 |
+
else:
|
| 571 |
+
inconsistency_score = 0.5
|
| 572 |
+
|
| 573 |
+
# Check variance of block differences
|
| 574 |
+
diff_variance = np.std(boundary_diffs) / (np.mean(boundary_diffs) + 1e-10)
|
| 575 |
+
variance_score = np.clip(diff_variance, 0, 1)
|
| 576 |
+
|
| 577 |
+
score = 0.5 * inconsistency_score + 0.5 * variance_score
|
| 578 |
+
|
| 579 |
+
return float(np.clip(score, 0, 1))
|
| 580 |
+
|
| 581 |
+
def _edge_coherence(self, img: np.ndarray) -> float:
|
| 582 |
+
"""
|
| 583 |
+
Check edge coherence - AI images often have inconsistent edges.
|
| 584 |
+
"""
|
| 585 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 586 |
+
|
| 587 |
+
# Detect edges using Canny
|
| 588 |
+
edges = cv2.Canny(gray, 50, 150)
|
| 589 |
+
|
| 590 |
+
# Calculate edge density
|
| 591 |
+
edge_density = np.mean(edges > 0)
|
| 592 |
+
|
| 593 |
+
# Very low or very high edge density is suspicious
|
| 594 |
+
if edge_density < 0.02:
|
| 595 |
+
density_score = 0.7 # Too few edges - over-smoothed
|
| 596 |
+
elif edge_density > 0.25:
|
| 597 |
+
density_score = 0.6 # Too many edges - over-sharpened
|
| 598 |
+
else:
|
| 599 |
+
density_score = 0.3 # Normal range
|
| 600 |
+
|
| 601 |
+
# Check edge continuity using Hough lines
|
| 602 |
+
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
|
| 603 |
+
|
| 604 |
+
if lines is not None and len(lines) > 0:
|
| 605 |
+
# Calculate line statistics
|
| 606 |
+
line_lengths = [np.sqrt((l[0][2] - l[0][0]) ** 2 + (l[0][3] - l[0][1]) ** 2) for l in lines]
|
| 607 |
+
avg_length = np.mean(line_lengths)
|
| 608 |
+
|
| 609 |
+
# Very uniform line lengths might indicate artificial generation
|
| 610 |
+
length_variance = np.std(line_lengths) / (avg_length + 1e-10)
|
| 611 |
+
continuity_score = 1 - np.clip(length_variance, 0, 1)
|
| 612 |
+
else:
|
| 613 |
+
continuity_score = 0.5
|
| 614 |
+
|
| 615 |
+
score = 0.5 * density_score + 0.5 * continuity_score
|
| 616 |
+
|
| 617 |
+
return float(np.clip(score, 0, 1))
|
| 618 |
+
|
| 619 |
+
def _sharpness_analysis(self, img: np.ndarray) -> float:
|
| 620 |
+
"""
|
| 621 |
+
Detect oversharpening and overblurring artifacts.
|
| 622 |
+
Uses Laplacian variance and morphological gradient.
|
| 623 |
+
|
| 624 |
+
Based on empirical analysis:
|
| 625 |
+
- Real photos: lap_var=400-1500, grad_mean=13-25
|
| 626 |
+
- Blur/smooth: lap_var=9-14, grad_mean=7-11
|
| 627 |
+
- Oversharp: lap_var=2500-12000+, grad_mean=30-75
|
| 628 |
+
"""
|
| 629 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 630 |
+
|
| 631 |
+
# Laplacian variance - measures sharpness
|
| 632 |
+
laplacian = cv2.Laplacian(gray, cv2.CV_64F)
|
| 633 |
+
lap_var = laplacian.var()
|
| 634 |
+
|
| 635 |
+
# Score based on Laplacian variance
|
| 636 |
+
if lap_var > 3500:
|
| 637 |
+
sharpness_score = 0.95 # Very oversharpened
|
| 638 |
+
elif lap_var > 2200:
|
| 639 |
+
sharpness_score = 0.80 # Oversharpened
|
| 640 |
+
elif lap_var > 1600:
|
| 641 |
+
sharpness_score = 0.45 # Upper normal range
|
| 642 |
+
elif lap_var < 30:
|
| 643 |
+
sharpness_score = 0.75 # Very blurry (heavily processed)
|
| 644 |
+
elif lap_var < 100:
|
| 645 |
+
sharpness_score = 0.55 # Blurry
|
| 646 |
+
else:
|
| 647 |
+
sharpness_score = 0.20 # Normal range (300-1600)
|
| 648 |
+
|
| 649 |
+
# Morphological gradient - detects halos from oversharpening
|
| 650 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
|
| 651 |
+
gradient = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, kernel)
|
| 652 |
+
grad_mean = np.mean(gradient)
|
| 653 |
+
|
| 654 |
+
# Gradient-based score
|
| 655 |
+
if grad_mean > 35:
|
| 656 |
+
halo_score = 0.90 # Strong oversharpening halos
|
| 657 |
+
elif grad_mean > 27:
|
| 658 |
+
halo_score = 0.70 # Moderate oversharpening
|
| 659 |
+
elif grad_mean < 12:
|
| 660 |
+
halo_score = 0.60 # Too smooth (blur artifacts)
|
| 661 |
+
else:
|
| 662 |
+
halo_score = 0.25 # Normal range
|
| 663 |
+
|
| 664 |
+
score = 0.55 * sharpness_score + 0.45 * halo_score
|
| 665 |
+
|
| 666 |
+
return float(np.clip(score, 0, 1))
|
| 667 |
+
|
| 668 |
+
def _color_channel_analysis(self, img: np.ndarray) -> float:
|
| 669 |
+
"""
|
| 670 |
+
Color Channel Consistency Analysis (Research Method 3).
|
| 671 |
+
|
| 672 |
+
AI-generated images often have:
|
| 673 |
+
- Unnatural color channel correlations
|
| 674 |
+
- Inconsistent noise across R, G, B channels
|
| 675 |
+
- Unusual saturation patterns
|
| 676 |
+
|
| 677 |
+
Real cameras have consistent color processing pipelines.
|
| 678 |
+
"""
|
| 679 |
+
# Split into color channels
|
| 680 |
+
b, g, r = cv2.split(img)
|
| 681 |
+
|
| 682 |
+
# === 1. Cross-channel correlation ===
|
| 683 |
+
# Real images: R, G, B channels are highly correlated
|
| 684 |
+
# AI images: sometimes have unusual decorrelation
|
| 685 |
+
def safe_corrcoef(a, b):
|
| 686 |
+
a_flat = a.flatten().astype(np.float64)
|
| 687 |
+
b_flat = b.flatten().astype(np.float64)
|
| 688 |
+
if np.std(a_flat) < 1e-10 or np.std(b_flat) < 1e-10:
|
| 689 |
+
return 0.5
|
| 690 |
+
corr = np.corrcoef(a_flat, b_flat)[0, 1]
|
| 691 |
+
return corr if not np.isnan(corr) else 0.5
|
| 692 |
+
|
| 693 |
+
rg_corr = safe_corrcoef(r, g)
|
| 694 |
+
rb_corr = safe_corrcoef(r, b)
|
| 695 |
+
gb_corr = safe_corrcoef(g, b)
|
| 696 |
+
|
| 697 |
+
avg_corr = (rg_corr + rb_corr + gb_corr) / 3
|
| 698 |
+
|
| 699 |
+
# Very low correlation is suspicious (unusual for natural images)
|
| 700 |
+
# Very high correlation might indicate grayscale converted to RGB
|
| 701 |
+
if avg_corr < 0.7:
|
| 702 |
+
corr_score = 0.7 # Low correlation - suspicious
|
| 703 |
+
elif avg_corr > 0.98:
|
| 704 |
+
corr_score = 0.6 # Too high - might be fake grayscale
|
| 705 |
+
else:
|
| 706 |
+
corr_score = 0.25 # Normal range
|
| 707 |
+
|
| 708 |
+
# === 2. Channel noise consistency ===
|
| 709 |
+
# Extract noise from each channel
|
| 710 |
+
def get_noise_std(channel):
|
| 711 |
+
blurred = cv2.GaussianBlur(channel, (5, 5), 0)
|
| 712 |
+
noise = channel.astype(np.float32) - blurred.astype(np.float32)
|
| 713 |
+
return np.std(noise)
|
| 714 |
+
|
| 715 |
+
r_noise = get_noise_std(r)
|
| 716 |
+
g_noise = get_noise_std(g)
|
| 717 |
+
b_noise = get_noise_std(b)
|
| 718 |
+
|
| 719 |
+
# Real cameras: similar noise across channels (sensor noise)
|
| 720 |
+
# AI: can have very different noise in different channels
|
| 721 |
+
noise_std = np.std([r_noise, g_noise, b_noise])
|
| 722 |
+
noise_mean = np.mean([r_noise, g_noise, b_noise])
|
| 723 |
+
|
| 724 |
+
noise_cv = noise_std / (noise_mean + 1e-10) # Coefficient of variation
|
| 725 |
+
|
| 726 |
+
if noise_cv > 0.3:
|
| 727 |
+
noise_score = 0.75 # High variation - suspicious
|
| 728 |
+
elif noise_cv > 0.15:
|
| 729 |
+
noise_score = 0.5
|
| 730 |
+
else:
|
| 731 |
+
noise_score = 0.25 # Consistent noise - likely real
|
| 732 |
+
|
| 733 |
+
# === 3. Saturation analysis ===
|
| 734 |
+
# AI images sometimes have unnatural saturation patterns
|
| 735 |
+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
| 736 |
+
saturation = hsv[:, :, 1]
|
| 737 |
+
|
| 738 |
+
sat_mean = np.mean(saturation)
|
| 739 |
+
sat_std = np.std(saturation)
|
| 740 |
+
|
| 741 |
+
# Very low saturation variance can indicate AI smoothing
|
| 742 |
+
if sat_std < 30:
|
| 743 |
+
sat_score = 0.65 # Low variance - suspicious
|
| 744 |
+
elif sat_mean > 200:
|
| 745 |
+
sat_score = 0.6 # Over-saturated
|
| 746 |
+
else:
|
| 747 |
+
sat_score = 0.3 # Normal
|
| 748 |
+
|
| 749 |
+
# Combine scores
|
| 750 |
+
score = 0.35 * corr_score + 0.35 * noise_score + 0.30 * sat_score
|
| 751 |
+
|
| 752 |
+
return float(np.clip(score, 0, 1))
|
| 753 |
+
|
| 754 |
+
def _local_binary_pattern_analysis(self, img: np.ndarray) -> float:
|
| 755 |
+
"""
|
| 756 |
+
Local Binary Pattern (LBP) Analysis (Research Method 4).
|
| 757 |
+
|
| 758 |
+
LBP captures micro-texture patterns:
|
| 759 |
+
- For each pixel, compare with 8 neighbors
|
| 760 |
+
- Create binary code based on comparisons
|
| 761 |
+
- Histogram of codes reveals texture characteristics
|
| 762 |
+
|
| 763 |
+
AI images have different LBP distributions than real photos.
|
| 764 |
+
"""
|
| 765 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 766 |
+
h, w = gray.shape
|
| 767 |
+
|
| 768 |
+
# Simple LBP implementation (8 neighbors, radius 1)
|
| 769 |
+
def compute_lbp(img):
|
| 770 |
+
img_h, img_w = img.shape
|
| 771 |
+
lbp = np.zeros_like(img, dtype=np.uint8)
|
| 772 |
+
|
| 773 |
+
for i in range(1, img_h - 1):
|
| 774 |
+
for j in range(1, img_w - 1):
|
| 775 |
+
center = img[i, j]
|
| 776 |
+
code = 0
|
| 777 |
+
|
| 778 |
+
# 8 neighbors in clockwise order
|
| 779 |
+
code |= (1 << 7) if img[i-1, j-1] >= center else 0
|
| 780 |
+
code |= (1 << 6) if img[i-1, j] >= center else 0
|
| 781 |
+
code |= (1 << 5) if img[i-1, j+1] >= center else 0
|
| 782 |
+
code |= (1 << 4) if img[i, j+1] >= center else 0
|
| 783 |
+
code |= (1 << 3) if img[i+1, j+1] >= center else 0
|
| 784 |
+
code |= (1 << 2) if img[i+1, j] >= center else 0
|
| 785 |
+
code |= (1 << 1) if img[i+1, j-1] >= center else 0
|
| 786 |
+
code |= (1 << 0) if img[i, j-1] >= center else 0
|
| 787 |
+
|
| 788 |
+
lbp[i, j] = code
|
| 789 |
+
|
| 790 |
+
return lbp
|
| 791 |
+
|
| 792 |
+
# For efficiency, sample a subset of the image
|
| 793 |
+
sample_size = min(200, h - 2, w - 2) # Leave margin for LBP
|
| 794 |
+
if sample_size < 10:
|
| 795 |
+
return 0.5 # Image too small
|
| 796 |
+
start_h = (h - sample_size) // 2
|
| 797 |
+
start_w = (w - sample_size) // 2
|
| 798 |
+
sample = gray[start_h:start_h+sample_size, start_w:start_w+sample_size]
|
| 799 |
+
|
| 800 |
+
lbp = compute_lbp(sample)
|
| 801 |
+
|
| 802 |
+
# Compute histogram
|
| 803 |
+
hist, _ = np.histogram(lbp.flatten(), bins=256, range=(0, 256))
|
| 804 |
+
hist = hist.astype(np.float32) / (hist.sum() + 1e-10)
|
| 805 |
+
|
| 806 |
+
# === Analysis of LBP histogram ===
|
| 807 |
+
|
| 808 |
+
# 1. Uniformity: AI images often have less uniform LBP patterns
|
| 809 |
+
# "Uniform" LBP patterns have at most 2 bitwise transitions
|
| 810 |
+
uniform_patterns = [0, 1, 2, 3, 4, 6, 7, 8, 12, 14, 15, 16, 24, 28, 30, 31,
|
| 811 |
+
32, 48, 56, 60, 62, 63, 64, 96, 112, 120, 124, 126, 127,
|
| 812 |
+
128, 129, 131, 135, 143, 159, 191, 192, 193, 195, 199,
|
| 813 |
+
207, 223, 224, 225, 227, 231, 239, 240, 241, 243, 247,
|
| 814 |
+
248, 249, 251, 252, 253, 254, 255]
|
| 815 |
+
|
| 816 |
+
uniform_ratio = sum(hist[p] for p in uniform_patterns if p < len(hist))
|
| 817 |
+
|
| 818 |
+
# Real images typically have 85-95% uniform patterns
|
| 819 |
+
# AI might have different ratios
|
| 820 |
+
if uniform_ratio < 0.7:
|
| 821 |
+
uniform_score = 0.75 # Low uniformity - suspicious
|
| 822 |
+
elif uniform_ratio > 0.95:
|
| 823 |
+
uniform_score = 0.6 # Too uniform - suspicious
|
| 824 |
+
else:
|
| 825 |
+
uniform_score = 0.25 # Normal
|
| 826 |
+
|
| 827 |
+
# 2. Entropy of LBP histogram
|
| 828 |
+
# AI images might have lower entropy (more predictable patterns)
|
| 829 |
+
entropy = -np.sum(hist * np.log2(hist + 1e-10))
|
| 830 |
+
max_entropy = np.log2(256)
|
| 831 |
+
norm_entropy = entropy / max_entropy
|
| 832 |
+
|
| 833 |
+
if norm_entropy < 0.6:
|
| 834 |
+
entropy_score = 0.7 # Low entropy - suspicious
|
| 835 |
+
elif norm_entropy > 0.9:
|
| 836 |
+
entropy_score = 0.5 # Very high entropy
|
| 837 |
+
else:
|
| 838 |
+
entropy_score = 0.3 # Normal
|
| 839 |
+
|
| 840 |
+
# 3. Peak analysis
|
| 841 |
+
# AI might have unusual peaks in histogram
|
| 842 |
+
max_bin = np.max(hist)
|
| 843 |
+
if max_bin > 0.1:
|
| 844 |
+
peak_score = 0.65 # Dominant pattern - suspicious
|
| 845 |
+
else:
|
| 846 |
+
peak_score = 0.3
|
| 847 |
+
|
| 848 |
+
score = 0.40 * uniform_score + 0.35 * entropy_score + 0.25 * peak_score
|
| 849 |
+
|
| 850 |
+
return float(np.clip(score, 0, 1))
|
| 851 |
+
|
| 852 |
+
def _glcm_texture_analysis(self, img: np.ndarray) -> float:
|
| 853 |
+
"""
|
| 854 |
+
Grey Level Co-occurrence Matrix (GLCM) Analysis (Research Method 5).
|
| 855 |
+
|
| 856 |
+
GLCM captures texture by analyzing how often pairs of pixel values
|
| 857 |
+
occur at specific spatial relationships.
|
| 858 |
+
|
| 859 |
+
Features: contrast, correlation, energy, homogeneity
|
| 860 |
+
AI images often have different GLCM statistics than real photos.
|
| 861 |
+
"""
|
| 862 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 863 |
+
h, w = gray.shape
|
| 864 |
+
|
| 865 |
+
# Quantize to fewer levels for efficiency
|
| 866 |
+
levels = 32
|
| 867 |
+
gray_quantized = (gray // (256 // levels)).astype(np.uint8)
|
| 868 |
+
|
| 869 |
+
# Sample region for efficiency
|
| 870 |
+
sample_size = min(200, h - 1, w - 1)
|
| 871 |
+
if sample_size < 10:
|
| 872 |
+
return 0.5 # Image too small
|
| 873 |
+
start_h = (h - sample_size) // 2
|
| 874 |
+
start_w = (w - sample_size) // 2
|
| 875 |
+
sample = gray_quantized[start_h:start_h+sample_size, start_w:start_w+sample_size]
|
| 876 |
+
|
| 877 |
+
# Compute GLCM for distance=1, angle=0 (horizontal)
|
| 878 |
+
glcm = np.zeros((levels, levels), dtype=np.float32)
|
| 879 |
+
|
| 880 |
+
for i in range(sample.shape[0]):
|
| 881 |
+
for j in range(sample.shape[1] - 1):
|
| 882 |
+
glcm[sample[i, j], sample[i, j+1]] += 1
|
| 883 |
+
|
| 884 |
+
# Normalize
|
| 885 |
+
glcm = glcm / (glcm.sum() + 1e-10)
|
| 886 |
+
|
| 887 |
+
# === GLCM Features ===
|
| 888 |
+
|
| 889 |
+
# Create indices for calculations
|
| 890 |
+
i_idx, j_idx = np.ogrid[:levels, :levels]
|
| 891 |
+
|
| 892 |
+
# 1. Contrast: measures local variations
|
| 893 |
+
contrast = np.sum(glcm * (i_idx - j_idx) ** 2)
|
| 894 |
+
|
| 895 |
+
# 2. Homogeneity: measures closeness of distribution to diagonal
|
| 896 |
+
homogeneity = np.sum(glcm / (1 + np.abs(i_idx - j_idx)))
|
| 897 |
+
|
| 898 |
+
# 3. Energy (Angular Second Moment): measures uniformity
|
| 899 |
+
energy = np.sum(glcm ** 2)
|
| 900 |
+
|
| 901 |
+
# 4. Correlation: measures linear dependency
|
| 902 |
+
mean_i = np.sum(i_idx * glcm)
|
| 903 |
+
mean_j = np.sum(j_idx * glcm)
|
| 904 |
+
std_i = np.sqrt(np.sum(glcm * (i_idx - mean_i) ** 2))
|
| 905 |
+
std_j = np.sqrt(np.sum(glcm * (j_idx - mean_j) ** 2))
|
| 906 |
+
|
| 907 |
+
if std_i > 1e-10 and std_j > 1e-10:
|
| 908 |
+
correlation = np.sum(glcm * (i_idx - mean_i) * (j_idx - mean_j)) / (std_i * std_j)
|
| 909 |
+
else:
|
| 910 |
+
correlation = 0
|
| 911 |
+
|
| 912 |
+
# === Scoring based on typical values ===
|
| 913 |
+
|
| 914 |
+
# AI images often have:
|
| 915 |
+
# - Lower contrast (smoother)
|
| 916 |
+
# - Higher homogeneity (more uniform)
|
| 917 |
+
# - Higher energy (more predictable patterns)
|
| 918 |
+
|
| 919 |
+
# Contrast score
|
| 920 |
+
if contrast < 50:
|
| 921 |
+
contrast_score = 0.7 # Very low contrast - suspicious
|
| 922 |
+
elif contrast < 150:
|
| 923 |
+
contrast_score = 0.5
|
| 924 |
+
else:
|
| 925 |
+
contrast_score = 0.25 # Normal contrast
|
| 926 |
+
|
| 927 |
+
# Homogeneity score
|
| 928 |
+
if homogeneity > 0.8:
|
| 929 |
+
homog_score = 0.7 # Very homogeneous - suspicious
|
| 930 |
+
elif homogeneity > 0.6:
|
| 931 |
+
homog_score = 0.45
|
| 932 |
+
else:
|
| 933 |
+
homog_score = 0.25
|
| 934 |
+
|
| 935 |
+
# Energy score
|
| 936 |
+
if energy > 0.1:
|
| 937 |
+
energy_score = 0.7 # High energy - suspicious
|
| 938 |
+
elif energy > 0.05:
|
| 939 |
+
energy_score = 0.45
|
| 940 |
+
else:
|
| 941 |
+
energy_score = 0.25
|
| 942 |
+
|
| 943 |
+
# Combine
|
| 944 |
+
score = 0.35 * contrast_score + 0.35 * homog_score + 0.30 * energy_score
|
| 945 |
+
|
| 946 |
+
return float(np.clip(score, 0, 1))
|
src/fusion/__init__.py
ADDED
|
File without changes
|
src/fusion/combiner.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Fusion Module: Combines forensic and VLM results
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from typing import Dict
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class FusionModule:
|
| 9 |
+
"""Combines pixel-level forensics with semantic VLM analysis."""
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
# Weights for combining scores
|
| 13 |
+
# When VLM is uncertain (0.5), we rely more on forensics
|
| 14 |
+
self.forensic_weight = 0.55
|
| 15 |
+
self.vlm_weight = 0.45
|
| 16 |
+
|
| 17 |
+
def combine(self, forensic_results: Dict, vlm_results: Dict) -> Dict:
|
| 18 |
+
"""
|
| 19 |
+
Combine forensic and VLM results into final prediction.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
forensic_results: Output from ForensicDetector
|
| 23 |
+
vlm_results: Output from VLMReasoner
|
| 24 |
+
|
| 25 |
+
Returns:
|
| 26 |
+
Final prediction dict with score, type, and reasoning
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
# Get forensic score (already 0-1)
|
| 30 |
+
forensic_score = forensic_results.get("aggregate_score", 0.5)
|
| 31 |
+
|
| 32 |
+
# Convert VLM result to score
|
| 33 |
+
vlm_score = self._vlm_to_score(vlm_results)
|
| 34 |
+
|
| 35 |
+
# Check for strong sharpness anomalies (oversharpening/blur)
|
| 36 |
+
sharpness_score = forensic_results.get("sharpness_score", 0)
|
| 37 |
+
noise_score = forensic_results.get("noise_score", 0)
|
| 38 |
+
strong_sharpness_anomaly = sharpness_score > 0.65
|
| 39 |
+
strong_noise_anomaly = noise_score > 0.65
|
| 40 |
+
|
| 41 |
+
# Adaptive weighting: if VLM is uncertain, rely more on forensics
|
| 42 |
+
vlm_confidence = vlm_results.get("confidence", "low")
|
| 43 |
+
is_vlm_uncertain = vlm_results.get("manipulation_detected", "uncertain") == "uncertain"
|
| 44 |
+
|
| 45 |
+
# Override: trust forensics when strong pixel-level anomalies detected
|
| 46 |
+
# VLM often misses sharpness/noise artifacts that forensics catches
|
| 47 |
+
if strong_sharpness_anomaly or strong_noise_anomaly:
|
| 48 |
+
f_weight = 0.80
|
| 49 |
+
v_weight = 0.20
|
| 50 |
+
elif is_vlm_uncertain or vlm_confidence == "low":
|
| 51 |
+
# VLM is uncertain - rely primarily on forensics
|
| 52 |
+
f_weight = 0.85
|
| 53 |
+
v_weight = 0.15
|
| 54 |
+
elif vlm_confidence == "medium":
|
| 55 |
+
f_weight = self.forensic_weight
|
| 56 |
+
v_weight = self.vlm_weight
|
| 57 |
+
else: # high confidence VLM
|
| 58 |
+
f_weight = 0.40
|
| 59 |
+
v_weight = 0.60
|
| 60 |
+
|
| 61 |
+
# Weighted combination
|
| 62 |
+
raw_score = f_weight * forensic_score + v_weight * vlm_score
|
| 63 |
+
|
| 64 |
+
# Boost score when forensics detect strong sharpness artifacts
|
| 65 |
+
# VLM cannot reliably detect oversharpening/blur
|
| 66 |
+
# Require BOTH high sharpness AND elevated aggregate forensic to avoid FPs
|
| 67 |
+
if sharpness_score > 0.70 and forensic_score > 0.45:
|
| 68 |
+
raw_score = max(raw_score, 0.50 + (sharpness_score - 0.70) * 0.5)
|
| 69 |
+
|
| 70 |
+
# Dampen false positives: when forensics are low/moderate but VLM says manipulated
|
| 71 |
+
# VLM can make semantic interpretation errors (e.g., dramatic skies)
|
| 72 |
+
if forensic_score < 0.45 and vlm_score > 0.6:
|
| 73 |
+
# Forensics should have the final say when pixel-level is clean
|
| 74 |
+
raw_score = min(raw_score, 0.42)
|
| 75 |
+
|
| 76 |
+
# Calibration: stretch scores to improve separation
|
| 77 |
+
# Apply sigmoid-like transformation
|
| 78 |
+
# This pushes low scores lower and high scores higher
|
| 79 |
+
import math
|
| 80 |
+
|
| 81 |
+
# Calibration center - tuned for balanced accuracy
|
| 82 |
+
# Real avg=0.446, Fake avg=0.503 on ai_generated_v2 dataset
|
| 83 |
+
if is_vlm_uncertain:
|
| 84 |
+
center = 0.45 # Balance between FP and FN
|
| 85 |
+
steepness = 5.0
|
| 86 |
+
else:
|
| 87 |
+
center = 0.42 # Normal threshold with VLM
|
| 88 |
+
steepness = 6.0
|
| 89 |
+
|
| 90 |
+
normalized = (raw_score - center) * steepness
|
| 91 |
+
final_score = 1 / (1 + math.exp(-normalized))
|
| 92 |
+
|
| 93 |
+
# Determine manipulation type
|
| 94 |
+
manipulation_type = self._determine_type(forensic_results, vlm_results, final_score)
|
| 95 |
+
|
| 96 |
+
# Generate combined reasoning
|
| 97 |
+
reasoning = self._generate_reasoning(forensic_results, vlm_results)
|
| 98 |
+
|
| 99 |
+
return {
|
| 100 |
+
"score": round(final_score, 3),
|
| 101 |
+
"manipulation_type": manipulation_type,
|
| 102 |
+
"reasoning": reasoning,
|
| 103 |
+
"forensic_score": round(forensic_score, 3),
|
| 104 |
+
"vlm_score": round(vlm_score, 3)
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
def _vlm_to_score(self, vlm_results: Dict) -> float:
|
| 108 |
+
"""Convert VLM categorical output to numeric score."""
|
| 109 |
+
|
| 110 |
+
base_score = 0.5 # Uncertain default
|
| 111 |
+
|
| 112 |
+
detection = vlm_results.get("manipulation_detected", "uncertain")
|
| 113 |
+
confidence = vlm_results.get("confidence", "low")
|
| 114 |
+
|
| 115 |
+
# Base score from detection
|
| 116 |
+
if detection == "yes":
|
| 117 |
+
base_score = 0.8
|
| 118 |
+
elif detection == "no":
|
| 119 |
+
base_score = 0.2
|
| 120 |
+
|
| 121 |
+
# Adjust by confidence
|
| 122 |
+
confidence_multiplier = {"high": 1.0, "medium": 0.7, "low": 0.4}
|
| 123 |
+
multiplier = confidence_multiplier.get(confidence, 0.5)
|
| 124 |
+
|
| 125 |
+
# Move score toward extremes based on confidence
|
| 126 |
+
if detection == "yes":
|
| 127 |
+
score = 0.5 + (base_score - 0.5) * multiplier
|
| 128 |
+
elif detection == "no":
|
| 129 |
+
score = 0.5 - (0.5 - base_score) * multiplier
|
| 130 |
+
else:
|
| 131 |
+
score = 0.5
|
| 132 |
+
|
| 133 |
+
return score
|
| 134 |
+
|
| 135 |
+
def _determine_type(self, forensic: Dict, vlm: Dict, final_score: float) -> str:
|
| 136 |
+
"""Determine the most likely manipulation type."""
|
| 137 |
+
|
| 138 |
+
# If score is low, it's likely authentic
|
| 139 |
+
if final_score < 0.48:
|
| 140 |
+
return "authentic"
|
| 141 |
+
|
| 142 |
+
# Use VLM type if confident and specific
|
| 143 |
+
vlm_type = vlm.get("manipulation_type", "unknown")
|
| 144 |
+
vlm_confidence = vlm.get("confidence", "low")
|
| 145 |
+
if vlm_type and vlm_type not in ["unknown", "authentic", "manipulation_detected"] and vlm_confidence != "low":
|
| 146 |
+
return vlm_type
|
| 147 |
+
|
| 148 |
+
# Infer from forensic signals
|
| 149 |
+
sharpness_score = forensic.get("sharpness_score", 0)
|
| 150 |
+
texture_score = forensic.get("texture_score", 0)
|
| 151 |
+
noise_score = forensic.get("noise_score", 0)
|
| 152 |
+
compression_score = forensic.get("compression_score", 0)
|
| 153 |
+
edge_score = forensic.get("edge_score", 0)
|
| 154 |
+
|
| 155 |
+
# High noise uniformity suggests AI generation
|
| 156 |
+
if noise_score > 0.65:
|
| 157 |
+
return "full_synthesis"
|
| 158 |
+
|
| 159 |
+
# High sharpness with noise suggests enhancement/filter
|
| 160 |
+
if sharpness_score > 0.65 and noise_score > 0.4:
|
| 161 |
+
return "filter"
|
| 162 |
+
|
| 163 |
+
# Very smooth textures suggest virtual staging
|
| 164 |
+
if texture_score > 0.45:
|
| 165 |
+
return "virtual_staging"
|
| 166 |
+
|
| 167 |
+
# High compression differences suggest splicing/inpainting
|
| 168 |
+
if compression_score > 0.72:
|
| 169 |
+
return "inpainting"
|
| 170 |
+
|
| 171 |
+
# Edge issues might indicate manipulation
|
| 172 |
+
if edge_score > 0.5:
|
| 173 |
+
return "inpainting"
|
| 174 |
+
|
| 175 |
+
# Default for high scores
|
| 176 |
+
if final_score > 0.55:
|
| 177 |
+
return "manipulation_detected"
|
| 178 |
+
|
| 179 |
+
return "authentic"
|
| 180 |
+
|
| 181 |
+
def _generate_reasoning(self, forensic: Dict, vlm: Dict) -> str:
|
| 182 |
+
"""Generate human-readable reasoning based on forensic and VLM analysis."""
|
| 183 |
+
|
| 184 |
+
reasons = []
|
| 185 |
+
agg_score = forensic.get("aggregate_score", 0.5)
|
| 186 |
+
|
| 187 |
+
# VLM reasoning (if available and not mock)
|
| 188 |
+
vlm_reasoning = vlm.get("reasoning", "")
|
| 189 |
+
if vlm_reasoning and "unavailable" not in vlm_reasoning.lower() and "Visual analysis completed" not in vlm_reasoning:
|
| 190 |
+
reasons.append(f"VLM observations: {vlm_reasoning}")
|
| 191 |
+
|
| 192 |
+
# Detailed forensic insights based on research
|
| 193 |
+
forensic_insights = []
|
| 194 |
+
|
| 195 |
+
# Sharpness analysis (strongest discriminator)
|
| 196 |
+
sharpness = forensic.get("sharpness_score", 0)
|
| 197 |
+
if sharpness > 0.7:
|
| 198 |
+
forensic_insights.append("significant oversharpening artifacts detected, common in AI enhancement")
|
| 199 |
+
elif sharpness > 0.55:
|
| 200 |
+
forensic_insights.append("moderate sharpness anomalies suggest post-processing")
|
| 201 |
+
|
| 202 |
+
# Noise analysis (AI images have different noise patterns)
|
| 203 |
+
noise = forensic.get("noise_score", 0)
|
| 204 |
+
if noise > 0.7:
|
| 205 |
+
forensic_insights.append("uniform noise patterns indicate AI-generated content")
|
| 206 |
+
elif noise > 0.5:
|
| 207 |
+
forensic_insights.append("noise distribution shows artificial smoothing")
|
| 208 |
+
|
| 209 |
+
# Compression analysis
|
| 210 |
+
compression = forensic.get("compression_score", 0)
|
| 211 |
+
if compression > 0.75:
|
| 212 |
+
forensic_insights.append("compression artifacts suggest digital manipulation")
|
| 213 |
+
elif compression > 0.6:
|
| 214 |
+
forensic_insights.append("minor compression inconsistencies noted")
|
| 215 |
+
|
| 216 |
+
# Texture analysis
|
| 217 |
+
texture = forensic.get("texture_score", 0)
|
| 218 |
+
if texture > 0.5:
|
| 219 |
+
forensic_insights.append("unnaturally smooth textures on walls or surfaces")
|
| 220 |
+
elif texture > 0.35:
|
| 221 |
+
forensic_insights.append("subtle texture smoothing detected")
|
| 222 |
+
|
| 223 |
+
# Edge coherence
|
| 224 |
+
edge = forensic.get("edge_score", 0)
|
| 225 |
+
if edge > 0.5:
|
| 226 |
+
forensic_insights.append("edge boundary anomalies around objects")
|
| 227 |
+
|
| 228 |
+
# Build final reasoning
|
| 229 |
+
if forensic_insights:
|
| 230 |
+
# Take top 2 most significant findings
|
| 231 |
+
top_insights = forensic_insights[:2]
|
| 232 |
+
reasons.append("Forensic analysis detected: " + "; ".join(top_insights) + ".")
|
| 233 |
+
|
| 234 |
+
# Generate appropriate conclusion if no specific insights
|
| 235 |
+
if not reasons:
|
| 236 |
+
if agg_score < 0.38:
|
| 237 |
+
return "Image appears authentic with natural lighting, consistent shadows, and realistic textures throughout."
|
| 238 |
+
elif agg_score < 0.48:
|
| 239 |
+
return "Image shows minor processing artifacts but overall appears to be an authentic photograph."
|
| 240 |
+
elif agg_score < 0.55:
|
| 241 |
+
return "Image has borderline characteristics that warrant closer inspection for potential manipulation."
|
| 242 |
+
else:
|
| 243 |
+
return "Multiple forensic signals indicate potential AI manipulation or heavy post-processing."
|
| 244 |
+
|
| 245 |
+
# Combine reasoning (max 2 sentences for competition format)
|
| 246 |
+
combined = " ".join(reasons)
|
| 247 |
+
sentences = combined.replace(". ", ".|").split("|")
|
| 248 |
+
result = ". ".join(s.strip() for s in sentences[:2] if s.strip())
|
| 249 |
+
if result and not result.endswith("."):
|
| 250 |
+
result += "."
|
| 251 |
+
return result
|
src/neural/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Neural network-based AI image detection."""
|
| 2 |
+
|
| 3 |
+
from .detector import NeuralDetector, DINOv2Detector
|
src/neural/detector.py
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Neural Network-based AI Image Detector
|
| 3 |
+
Uses pre-trained models from HuggingFace for detecting AI-generated images.
|
| 4 |
+
|
| 5 |
+
Based on research recommendations:
|
| 6 |
+
- DINOv2/CLIP for feature extraction
|
| 7 |
+
- Pre-trained deepfake detectors
|
| 8 |
+
- Ensemble approach for robustness
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
import torch.nn.functional as F
|
| 13 |
+
from PIL import Image
|
| 14 |
+
from typing import Dict, Optional, Tuple
|
| 15 |
+
import numpy as np
|
| 16 |
+
import os
|
| 17 |
+
|
| 18 |
+
# Lazy imports to avoid loading everything at startup
|
| 19 |
+
_clip_model = None
|
| 20 |
+
_clip_processor = None
|
| 21 |
+
_ai_detector = None
|
| 22 |
+
_ai_detector_processor = None
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def get_device():
|
| 26 |
+
"""Get the best available device."""
|
| 27 |
+
if torch.cuda.is_available():
|
| 28 |
+
return torch.device("cuda")
|
| 29 |
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
| 30 |
+
return torch.device("mps")
|
| 31 |
+
return torch.device("cpu")
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class NeuralDetector:
|
| 35 |
+
"""
|
| 36 |
+
Neural network-based detector using pre-trained models.
|
| 37 |
+
|
| 38 |
+
Uses:
|
| 39 |
+
1. CLIP for zero-shot AI image detection
|
| 40 |
+
2. Pre-trained AI image detector from HuggingFace
|
| 41 |
+
3. Ensemble of both for robust predictions
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(self, use_clip: bool = True, use_ai_detector: bool = True):
|
| 45 |
+
"""
|
| 46 |
+
Initialize the neural detector.
|
| 47 |
+
|
| 48 |
+
Args:
|
| 49 |
+
use_clip: Whether to use CLIP for zero-shot detection
|
| 50 |
+
use_ai_detector: Whether to use pre-trained AI detector
|
| 51 |
+
"""
|
| 52 |
+
self.device = get_device()
|
| 53 |
+
self.use_clip = use_clip
|
| 54 |
+
self.use_ai_detector = use_ai_detector
|
| 55 |
+
|
| 56 |
+
# Models loaded lazily on first use
|
| 57 |
+
self._clip_loaded = False
|
| 58 |
+
self._detector_loaded = False
|
| 59 |
+
|
| 60 |
+
def _load_clip(self):
|
| 61 |
+
"""Load CLIP model for zero-shot classification."""
|
| 62 |
+
if self._clip_loaded:
|
| 63 |
+
return
|
| 64 |
+
|
| 65 |
+
global _clip_model, _clip_processor
|
| 66 |
+
|
| 67 |
+
if _clip_model is None:
|
| 68 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 69 |
+
print("Loading CLIP model...")
|
| 70 |
+
_clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
| 71 |
+
_clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
| 72 |
+
_clip_model = _clip_model.to(self.device)
|
| 73 |
+
_clip_model.eval()
|
| 74 |
+
print("CLIP model loaded.")
|
| 75 |
+
|
| 76 |
+
self._clip_loaded = True
|
| 77 |
+
|
| 78 |
+
def _load_ai_detector(self):
|
| 79 |
+
"""Load pre-trained AI image detector."""
|
| 80 |
+
if self._detector_loaded:
|
| 81 |
+
return
|
| 82 |
+
|
| 83 |
+
global _ai_detector, _ai_detector_processor
|
| 84 |
+
|
| 85 |
+
if _ai_detector is None:
|
| 86 |
+
from transformers import AutoModelForImageClassification, AutoImageProcessor
|
| 87 |
+
print("Loading AI image detector...")
|
| 88 |
+
|
| 89 |
+
# Try different models in order of preference
|
| 90 |
+
models_to_try = [
|
| 91 |
+
"umm-maybe/AI-image-detector", # General AI detector
|
| 92 |
+
"Organika/sdxl-detector", # SDXL specific
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
for model_name in models_to_try:
|
| 96 |
+
try:
|
| 97 |
+
_ai_detector = AutoModelForImageClassification.from_pretrained(model_name)
|
| 98 |
+
_ai_detector_processor = AutoImageProcessor.from_pretrained(model_name)
|
| 99 |
+
_ai_detector = _ai_detector.to(self.device)
|
| 100 |
+
_ai_detector.eval()
|
| 101 |
+
print(f"Loaded AI detector: {model_name}")
|
| 102 |
+
break
|
| 103 |
+
except Exception as e:
|
| 104 |
+
print(f"Failed to load {model_name}: {e}")
|
| 105 |
+
continue
|
| 106 |
+
|
| 107 |
+
if _ai_detector is None:
|
| 108 |
+
print("Warning: No AI detector model available. Using CLIP only.")
|
| 109 |
+
self.use_ai_detector = False
|
| 110 |
+
|
| 111 |
+
self._detector_loaded = True
|
| 112 |
+
|
| 113 |
+
def analyze_with_clip(self, image: Image.Image) -> Dict:
|
| 114 |
+
"""
|
| 115 |
+
Use CLIP for zero-shot AI image detection.
|
| 116 |
+
|
| 117 |
+
Research shows CLIP can detect AI images by comparing embeddings
|
| 118 |
+
to text descriptions like "AI generated image" vs "real photograph".
|
| 119 |
+
"""
|
| 120 |
+
self._load_clip()
|
| 121 |
+
|
| 122 |
+
# Text prompts for classification
|
| 123 |
+
# Based on research: be specific about what we're looking for
|
| 124 |
+
text_prompts = [
|
| 125 |
+
"a real photograph taken by a camera",
|
| 126 |
+
"an AI generated image, synthetic, artificial, computer generated",
|
| 127 |
+
]
|
| 128 |
+
|
| 129 |
+
inputs = _clip_processor(
|
| 130 |
+
text=text_prompts,
|
| 131 |
+
images=image,
|
| 132 |
+
return_tensors="pt",
|
| 133 |
+
padding=True
|
| 134 |
+
)
|
| 135 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 136 |
+
|
| 137 |
+
with torch.no_grad():
|
| 138 |
+
outputs = _clip_model(**inputs)
|
| 139 |
+
logits_per_image = outputs.logits_per_image
|
| 140 |
+
probs = F.softmax(logits_per_image, dim=1)
|
| 141 |
+
|
| 142 |
+
# prob[0] = real, prob[1] = AI
|
| 143 |
+
probs = probs.cpu().numpy()[0]
|
| 144 |
+
|
| 145 |
+
return {
|
| 146 |
+
"clip_real_prob": float(probs[0]),
|
| 147 |
+
"clip_fake_prob": float(probs[1]),
|
| 148 |
+
"clip_score": float(probs[1]), # Higher = more likely AI
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
def analyze_with_detector(self, image: Image.Image) -> Dict:
|
| 152 |
+
"""
|
| 153 |
+
Use pre-trained AI image detector.
|
| 154 |
+
"""
|
| 155 |
+
self._load_ai_detector()
|
| 156 |
+
|
| 157 |
+
if _ai_detector is None:
|
| 158 |
+
return {"detector_score": 0.5, "detector_available": False}
|
| 159 |
+
|
| 160 |
+
inputs = _ai_detector_processor(images=image, return_tensors="pt")
|
| 161 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 162 |
+
|
| 163 |
+
with torch.no_grad():
|
| 164 |
+
outputs = _ai_detector(**inputs)
|
| 165 |
+
logits = outputs.logits
|
| 166 |
+
probs = F.softmax(logits, dim=1)
|
| 167 |
+
|
| 168 |
+
probs = probs.cpu().numpy()[0]
|
| 169 |
+
|
| 170 |
+
# Model typically has labels like ['artificial', 'human'] or similar
|
| 171 |
+
# Check the label order
|
| 172 |
+
labels = _ai_detector.config.id2label
|
| 173 |
+
|
| 174 |
+
# Find which index corresponds to AI/fake
|
| 175 |
+
fake_idx = None
|
| 176 |
+
for idx, label in labels.items():
|
| 177 |
+
if any(kw in label.lower() for kw in ['artificial', 'ai', 'fake', 'synthetic', 'generated']):
|
| 178 |
+
fake_idx = idx
|
| 179 |
+
break
|
| 180 |
+
|
| 181 |
+
if fake_idx is None:
|
| 182 |
+
# Assume index 0 is AI (common convention)
|
| 183 |
+
fake_idx = 0
|
| 184 |
+
|
| 185 |
+
return {
|
| 186 |
+
"detector_score": float(probs[fake_idx]),
|
| 187 |
+
"detector_probs": {labels[i]: float(probs[i]) for i in range(len(probs))},
|
| 188 |
+
"detector_available": True,
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
def analyze(self, image_path: str) -> Dict:
|
| 192 |
+
"""
|
| 193 |
+
Analyze an image for AI generation.
|
| 194 |
+
|
| 195 |
+
Args:
|
| 196 |
+
image_path: Path to the image file
|
| 197 |
+
|
| 198 |
+
Returns:
|
| 199 |
+
Dict with detection results and aggregate score
|
| 200 |
+
"""
|
| 201 |
+
# Load image
|
| 202 |
+
image = Image.open(image_path).convert("RGB")
|
| 203 |
+
|
| 204 |
+
results = {}
|
| 205 |
+
scores = []
|
| 206 |
+
weights = []
|
| 207 |
+
|
| 208 |
+
# CLIP analysis
|
| 209 |
+
if self.use_clip:
|
| 210 |
+
try:
|
| 211 |
+
clip_results = self.analyze_with_clip(image)
|
| 212 |
+
results.update(clip_results)
|
| 213 |
+
scores.append(clip_results["clip_score"])
|
| 214 |
+
weights.append(0.4) # CLIP weight
|
| 215 |
+
except Exception as e:
|
| 216 |
+
results["clip_error"] = str(e)
|
| 217 |
+
|
| 218 |
+
# Pre-trained detector analysis
|
| 219 |
+
if self.use_ai_detector:
|
| 220 |
+
try:
|
| 221 |
+
detector_results = self.analyze_with_detector(image)
|
| 222 |
+
results.update(detector_results)
|
| 223 |
+
if detector_results.get("detector_available", False):
|
| 224 |
+
scores.append(detector_results["detector_score"])
|
| 225 |
+
weights.append(0.6) # Pre-trained detector weight (higher trust)
|
| 226 |
+
except Exception as e:
|
| 227 |
+
results["detector_error"] = str(e)
|
| 228 |
+
|
| 229 |
+
# Compute aggregate score
|
| 230 |
+
if scores:
|
| 231 |
+
# Weighted average
|
| 232 |
+
total_weight = sum(weights)
|
| 233 |
+
aggregate = sum(s * w for s, w in zip(scores, weights)) / total_weight
|
| 234 |
+
results["neural_aggregate_score"] = float(aggregate)
|
| 235 |
+
else:
|
| 236 |
+
results["neural_aggregate_score"] = 0.5 # Neutral if no models worked
|
| 237 |
+
|
| 238 |
+
return results
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
class DINOv2Detector:
|
| 242 |
+
"""
|
| 243 |
+
DINOv2-based detector for AI image detection.
|
| 244 |
+
|
| 245 |
+
Research shows DINOv2 features are highly discriminative for AI vs real images.
|
| 246 |
+
This uses DINOv2 as a feature extractor with a simple classifier head.
|
| 247 |
+
|
| 248 |
+
Note: This requires training on labeled data, so we use it in feature extraction
|
| 249 |
+
mode and combine with other signals.
|
| 250 |
+
"""
|
| 251 |
+
|
| 252 |
+
def __init__(self):
|
| 253 |
+
self.device = get_device()
|
| 254 |
+
self.model = None
|
| 255 |
+
self.processor = None
|
| 256 |
+
|
| 257 |
+
def _load_model(self):
|
| 258 |
+
if self.model is not None:
|
| 259 |
+
return
|
| 260 |
+
|
| 261 |
+
from transformers import AutoImageProcessor, AutoModel
|
| 262 |
+
print("Loading DINOv2 model...")
|
| 263 |
+
|
| 264 |
+
# Use smaller variant for CPU
|
| 265 |
+
model_name = "facebook/dinov2-small"
|
| 266 |
+
|
| 267 |
+
self.processor = AutoImageProcessor.from_pretrained(model_name)
|
| 268 |
+
self.model = AutoModel.from_pretrained(model_name)
|
| 269 |
+
self.model = self.model.to(self.device)
|
| 270 |
+
self.model.eval()
|
| 271 |
+
print("DINOv2 model loaded.")
|
| 272 |
+
|
| 273 |
+
def extract_features(self, image_path: str) -> np.ndarray:
|
| 274 |
+
"""Extract DINOv2 features from an image."""
|
| 275 |
+
self._load_model()
|
| 276 |
+
|
| 277 |
+
image = Image.open(image_path).convert("RGB")
|
| 278 |
+
inputs = self.processor(images=image, return_tensors="pt")
|
| 279 |
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
| 280 |
+
|
| 281 |
+
with torch.no_grad():
|
| 282 |
+
outputs = self.model(**inputs)
|
| 283 |
+
# Use CLS token as image representation
|
| 284 |
+
features = outputs.last_hidden_state[:, 0, :]
|
| 285 |
+
|
| 286 |
+
return features.cpu().numpy()[0]
|
| 287 |
+
|
| 288 |
+
def analyze(self, image_path: str) -> Dict:
|
| 289 |
+
"""
|
| 290 |
+
Analyze image using DINOv2 features.
|
| 291 |
+
|
| 292 |
+
Since we don't have a trained classifier, we use statistical properties
|
| 293 |
+
of the features that research shows differ between AI and real images.
|
| 294 |
+
"""
|
| 295 |
+
features = self.extract_features(image_path)
|
| 296 |
+
|
| 297 |
+
# Research insight: AI images tend to have more uniform feature distributions
|
| 298 |
+
# Real images have more varied, scene-specific features
|
| 299 |
+
|
| 300 |
+
feature_std = np.std(features)
|
| 301 |
+
feature_kurtosis = self._kurtosis(features)
|
| 302 |
+
feature_entropy = self._entropy(features)
|
| 303 |
+
|
| 304 |
+
# Normalize to 0-1 scores
|
| 305 |
+
# Based on empirical observation: AI images have lower std, lower kurtosis
|
| 306 |
+
# These thresholds would need calibration on actual data
|
| 307 |
+
|
| 308 |
+
std_score = 1 - np.clip(feature_std / 1.0, 0, 1) # Lower std = more suspicious
|
| 309 |
+
kurtosis_score = 1 - np.clip((feature_kurtosis + 2) / 6, 0, 1) # Lower kurtosis = suspicious
|
| 310 |
+
|
| 311 |
+
# Weighted combination
|
| 312 |
+
dino_score = 0.6 * std_score + 0.4 * kurtosis_score
|
| 313 |
+
|
| 314 |
+
return {
|
| 315 |
+
"dino_feature_std": float(feature_std),
|
| 316 |
+
"dino_feature_kurtosis": float(feature_kurtosis),
|
| 317 |
+
"dino_feature_entropy": float(feature_entropy),
|
| 318 |
+
"dino_score": float(np.clip(dino_score, 0, 1)),
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
def _kurtosis(self, x):
|
| 322 |
+
"""Compute kurtosis of array."""
|
| 323 |
+
n = len(x)
|
| 324 |
+
mean = np.mean(x)
|
| 325 |
+
std = np.std(x)
|
| 326 |
+
if std == 0:
|
| 327 |
+
return 0
|
| 328 |
+
return np.sum(((x - mean) / std) ** 4) / n - 3
|
| 329 |
+
|
| 330 |
+
def _entropy(self, x):
|
| 331 |
+
"""Compute entropy of feature distribution."""
|
| 332 |
+
# Discretize features into bins
|
| 333 |
+
hist, _ = np.histogram(x, bins=50, density=True)
|
| 334 |
+
hist = hist[hist > 0]
|
| 335 |
+
return -np.sum(hist * np.log2(hist + 1e-10))
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def test_neural_detector():
|
| 339 |
+
"""Test the neural detector on sample images."""
|
| 340 |
+
import glob
|
| 341 |
+
|
| 342 |
+
detector = NeuralDetector()
|
| 343 |
+
|
| 344 |
+
# Find test images
|
| 345 |
+
fake_images = glob.glob("/home/omer_aims_ac_za/digital-integrity-challenge/data/ai_generated_v2/*.png")[:5]
|
| 346 |
+
real_images = glob.glob("/home/omer_aims_ac_za/digital-integrity-challenge/data/real/*.jpg")[:5]
|
| 347 |
+
|
| 348 |
+
print("\n=== Testing on FAKE images ===")
|
| 349 |
+
fake_scores = []
|
| 350 |
+
for img_path in fake_images:
|
| 351 |
+
results = detector.analyze(img_path)
|
| 352 |
+
score = results.get("neural_aggregate_score", 0.5)
|
| 353 |
+
fake_scores.append(score)
|
| 354 |
+
print(f"{os.path.basename(img_path)}: {score:.3f}")
|
| 355 |
+
|
| 356 |
+
print("\n=== Testing on REAL images ===")
|
| 357 |
+
real_scores = []
|
| 358 |
+
for img_path in real_images:
|
| 359 |
+
results = detector.analyze(img_path)
|
| 360 |
+
score = results.get("neural_aggregate_score", 0.5)
|
| 361 |
+
real_scores.append(score)
|
| 362 |
+
print(f"{os.path.basename(img_path)}: {score:.3f}")
|
| 363 |
+
|
| 364 |
+
print(f"\n=== Summary ===")
|
| 365 |
+
print(f"FAKE avg: {np.mean(fake_scores):.3f}")
|
| 366 |
+
print(f"REAL avg: {np.mean(real_scores):.3f}")
|
| 367 |
+
print(f"Separation: {np.mean(fake_scores) - np.mean(real_scores):.3f}")
|
| 368 |
+
|
| 369 |
+
# Good detector should have FAKE > REAL scores
|
| 370 |
+
accuracy = (sum(1 for s in fake_scores if s >= 0.5) + sum(1 for s in real_scores if s < 0.5)) / (len(fake_scores) + len(real_scores))
|
| 371 |
+
print(f"Accuracy (threshold=0.5): {accuracy*100:.1f}%")
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
if __name__ == "__main__":
|
| 375 |
+
test_neural_detector()
|
src/vlm/__init__.py
ADDED
|
File without changes
|
src/vlm/reasoner.py
ADDED
|
@@ -0,0 +1,636 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Module 2: VLM Logic Reasoner
|
| 3 |
+
Semantic-level analysis using Vision-Language Models
|
| 4 |
+
|
| 5 |
+
Local models only (no API keys required for competition).
|
| 6 |
+
TPU support via JAX for PaliGemma models.
|
| 7 |
+
Models ordered from smallest to largest for disk efficiency.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import re
|
| 12 |
+
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
|
| 13 |
+
from typing import Dict, List, Optional
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
# VLM inference timeout in seconds
|
| 17 |
+
VLM_TIMEOUT_SECONDS = 60
|
| 18 |
+
|
| 19 |
+
# ============================================================================
|
| 20 |
+
# RESEARCH-BASED PROMPT TEMPLATES
|
| 21 |
+
# ============================================================================
|
| 22 |
+
|
| 23 |
+
# Real estate specific prompt (optimized for smaller models)
|
| 24 |
+
REAL_ESTATE_PROMPT = """Analyze this real estate image for AI manipulation or virtual staging.
|
| 25 |
+
|
| 26 |
+
Check these red flags:
|
| 27 |
+
1. Do furniture shadows match light sources?
|
| 28 |
+
2. Are wall/floor textures unnaturally smooth?
|
| 29 |
+
3. Do reflections look consistent?
|
| 30 |
+
4. Are furniture edges blended naturally?
|
| 31 |
+
5. Is the scale/proportion realistic?
|
| 32 |
+
|
| 33 |
+
Respond in this format:
|
| 34 |
+
MANIPULATION_DETECTED: YES or NO or UNCERTAIN
|
| 35 |
+
CONFIDENCE: HIGH or MEDIUM or LOW
|
| 36 |
+
MANIPULATION_TYPE: authentic or virtual_staging or inpainting or full_synthesis
|
| 37 |
+
REASONING: One sentence explaining why."""
|
| 38 |
+
|
| 39 |
+
# Simple prompt for basic models
|
| 40 |
+
SIMPLE_PROMPT = """Is this real estate image real or AI-generated?
|
| 41 |
+
Check shadows, textures, and reflections.
|
| 42 |
+
Answer: REAL or FAKE, then explain briefly."""
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class VLMReasoner:
|
| 46 |
+
"""Uses local VLMs to detect semantic anomalies. TPU-optimized."""
|
| 47 |
+
|
| 48 |
+
# Model priority: largest/best first for better reasoning
|
| 49 |
+
MODEL_PRIORITY = [
|
| 50 |
+
"qwen2vl", # Best: 72B/7B available
|
| 51 |
+
"paligemma", # Good: 28B/10B available
|
| 52 |
+
"blip2", # Fallback: 2.7B
|
| 53 |
+
"mock", # Last resort
|
| 54 |
+
]
|
| 55 |
+
|
| 56 |
+
def __init__(self, backend: str = "auto", use_tpu: bool = True):
|
| 57 |
+
"""
|
| 58 |
+
Initialize VLM reasoner.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
backend: Model to use ("auto", "blip2", "paligemma", "qwen2vl", "mock")
|
| 62 |
+
use_tpu: Whether to use TPU if available (for JAX models)
|
| 63 |
+
"""
|
| 64 |
+
self.use_tpu = use_tpu
|
| 65 |
+
self.backend = self._detect_backend(backend)
|
| 66 |
+
self.model = None
|
| 67 |
+
self.processor = None
|
| 68 |
+
self.device = None
|
| 69 |
+
self._init_backend()
|
| 70 |
+
|
| 71 |
+
def _detect_backend(self, backend: str) -> str:
|
| 72 |
+
"""Detect best available backend, starting with smallest."""
|
| 73 |
+
if backend != "auto":
|
| 74 |
+
return backend
|
| 75 |
+
|
| 76 |
+
# Auto-detect: try models in order of size (smallest first)
|
| 77 |
+
for model in self.MODEL_PRIORITY:
|
| 78 |
+
if model == "mock":
|
| 79 |
+
return "mock"
|
| 80 |
+
if self._check_model_available(model):
|
| 81 |
+
return model
|
| 82 |
+
|
| 83 |
+
return "mock"
|
| 84 |
+
|
| 85 |
+
def _check_model_available(self, model: str) -> bool:
|
| 86 |
+
"""Check if model dependencies are available."""
|
| 87 |
+
try:
|
| 88 |
+
if model == "blip2":
|
| 89 |
+
from transformers import Blip2Processor
|
| 90 |
+
return True
|
| 91 |
+
elif model == "paligemma":
|
| 92 |
+
# Check for JAX (TPU) or PyTorch
|
| 93 |
+
try:
|
| 94 |
+
import jax
|
| 95 |
+
return True
|
| 96 |
+
except:
|
| 97 |
+
pass
|
| 98 |
+
try:
|
| 99 |
+
from transformers import PaliGemmaForConditionalGeneration
|
| 100 |
+
return True
|
| 101 |
+
except:
|
| 102 |
+
pass
|
| 103 |
+
return False
|
| 104 |
+
elif model == "qwen2vl":
|
| 105 |
+
from transformers import AutoProcessor
|
| 106 |
+
return True
|
| 107 |
+
except ImportError:
|
| 108 |
+
return False
|
| 109 |
+
return False
|
| 110 |
+
|
| 111 |
+
def _init_backend(self):
|
| 112 |
+
"""Initialize the selected backend."""
|
| 113 |
+
print(f"Initializing VLM backend: {self.backend}")
|
| 114 |
+
|
| 115 |
+
try:
|
| 116 |
+
if self.backend == "blip2":
|
| 117 |
+
self._init_blip2()
|
| 118 |
+
elif self.backend == "paligemma":
|
| 119 |
+
self._init_paligemma()
|
| 120 |
+
elif self.backend == "qwen2vl":
|
| 121 |
+
self._init_qwen2vl()
|
| 122 |
+
elif self.backend == "mock":
|
| 123 |
+
print("Using mock VLM backend (forensics only)")
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print(f"Failed to initialize {self.backend}: {e}")
|
| 126 |
+
print("Falling back to next available backend...")
|
| 127 |
+
self._fallback_init()
|
| 128 |
+
|
| 129 |
+
def _fallback_init(self):
|
| 130 |
+
"""Try fallback backends in order."""
|
| 131 |
+
for model in self.MODEL_PRIORITY:
|
| 132 |
+
if model == self.backend:
|
| 133 |
+
continue
|
| 134 |
+
try:
|
| 135 |
+
print(f"Trying fallback: {model}")
|
| 136 |
+
self.backend = model
|
| 137 |
+
if model == "blip2":
|
| 138 |
+
self._init_blip2()
|
| 139 |
+
elif model == "paligemma":
|
| 140 |
+
self._init_paligemma()
|
| 141 |
+
elif model == "qwen2vl":
|
| 142 |
+
self._init_qwen2vl()
|
| 143 |
+
elif model == "mock":
|
| 144 |
+
return
|
| 145 |
+
print(f"Fallback {model} initialized!")
|
| 146 |
+
return
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"Fallback {model} failed: {e}")
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
print("All backends failed. Using mock.")
|
| 152 |
+
self.backend = "mock"
|
| 153 |
+
|
| 154 |
+
def _get_device(self):
|
| 155 |
+
"""Detect best available device."""
|
| 156 |
+
import torch
|
| 157 |
+
if torch.cuda.is_available():
|
| 158 |
+
return "cuda"
|
| 159 |
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
| 160 |
+
return "mps"
|
| 161 |
+
return "cpu"
|
| 162 |
+
|
| 163 |
+
def _check_tpu_available(self) -> bool:
|
| 164 |
+
"""Check if TPU is available via JAX."""
|
| 165 |
+
if not self.use_tpu:
|
| 166 |
+
return False
|
| 167 |
+
try:
|
| 168 |
+
import jax
|
| 169 |
+
devices = jax.devices()
|
| 170 |
+
return any("Tpu" in str(d) for d in devices)
|
| 171 |
+
except:
|
| 172 |
+
return False
|
| 173 |
+
|
| 174 |
+
def _init_blip2(self):
|
| 175 |
+
"""Initialize BLIP-2 (smallest, ~5GB)."""
|
| 176 |
+
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
| 177 |
+
import torch
|
| 178 |
+
|
| 179 |
+
model_id = "Salesforce/blip2-opt-2.7b"
|
| 180 |
+
print(f"Loading {model_id}...")
|
| 181 |
+
|
| 182 |
+
self.device = self._get_device()
|
| 183 |
+
dtype = torch.float16 if self.device == "cuda" else torch.float32
|
| 184 |
+
|
| 185 |
+
self.processor = Blip2Processor.from_pretrained(model_id)
|
| 186 |
+
self.model = Blip2ForConditionalGeneration.from_pretrained(
|
| 187 |
+
model_id,
|
| 188 |
+
torch_dtype=dtype,
|
| 189 |
+
device_map="auto" if self.device == "cuda" else None,
|
| 190 |
+
low_cpu_mem_usage=True,
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
if self.device != "cuda":
|
| 194 |
+
self.model = self.model.to(self.device)
|
| 195 |
+
|
| 196 |
+
self.model.eval()
|
| 197 |
+
print(f"BLIP-2 loaded on {self.device}!")
|
| 198 |
+
|
| 199 |
+
def _init_paligemma(self):
|
| 200 |
+
"""Initialize PaliGemma with TPU support via JAX or PyTorch fallback."""
|
| 201 |
+
if self._check_tpu_available():
|
| 202 |
+
self._init_paligemma_jax()
|
| 203 |
+
else:
|
| 204 |
+
self._init_paligemma_torch()
|
| 205 |
+
|
| 206 |
+
def _init_paligemma_jax(self):
|
| 207 |
+
"""Initialize PaliGemma using JAX for TPU."""
|
| 208 |
+
print("Initializing PaliGemma with JAX/TPU...")
|
| 209 |
+
|
| 210 |
+
try:
|
| 211 |
+
import jax
|
| 212 |
+
import jax.numpy as jnp
|
| 213 |
+
from transformers import AutoProcessor
|
| 214 |
+
from big_vision.models.proj.paligemma import paligemma
|
| 215 |
+
from big_vision.trainers.proj.paligemma import predict_fns
|
| 216 |
+
|
| 217 |
+
# Use smallest PaliGemma model
|
| 218 |
+
model_id = "google/paligemma-3b-pt-224"
|
| 219 |
+
|
| 220 |
+
self.processor = AutoProcessor.from_pretrained(model_id)
|
| 221 |
+
# JAX model loading would go here
|
| 222 |
+
# For now, fall back to PyTorch if big_vision not available
|
| 223 |
+
|
| 224 |
+
print(f"PaliGemma JAX loaded on TPU!")
|
| 225 |
+
self.device = "tpu"
|
| 226 |
+
|
| 227 |
+
except ImportError as e:
|
| 228 |
+
print(f"JAX PaliGemma not available: {e}")
|
| 229 |
+
print("Falling back to PyTorch PaliGemma...")
|
| 230 |
+
self._init_paligemma_torch()
|
| 231 |
+
|
| 232 |
+
def _init_paligemma_torch(self):
|
| 233 |
+
"""Initialize PaliGemma using PyTorch."""
|
| 234 |
+
print("Initializing PaliGemma with PyTorch...")
|
| 235 |
+
|
| 236 |
+
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
|
| 237 |
+
import torch
|
| 238 |
+
|
| 239 |
+
self.device = self._get_device()
|
| 240 |
+
|
| 241 |
+
# Use larger PaliGemma models (bigger = better reasoning)
|
| 242 |
+
model_candidates = [
|
| 243 |
+
"google/paligemma2-28b-pt-896", # ~56GB, best
|
| 244 |
+
"google/paligemma2-10b-pt-448", # ~20GB, good balance
|
| 245 |
+
"google/paligemma-3b-pt-448", # ~6GB, fallback
|
| 246 |
+
"google/paligemma-3b-pt-224", # ~6GB, smallest
|
| 247 |
+
]
|
| 248 |
+
|
| 249 |
+
dtype = torch.bfloat16 if self.device == "cuda" else torch.float32
|
| 250 |
+
|
| 251 |
+
for model_id in model_candidates:
|
| 252 |
+
try:
|
| 253 |
+
print(f"Trying {model_id}...")
|
| 254 |
+
self.processor = AutoProcessor.from_pretrained(model_id)
|
| 255 |
+
self.model = PaliGemmaForConditionalGeneration.from_pretrained(
|
| 256 |
+
model_id,
|
| 257 |
+
torch_dtype=dtype,
|
| 258 |
+
device_map="auto" if self.device == "cuda" else None,
|
| 259 |
+
low_cpu_mem_usage=True,
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
if self.device not in ["cuda"]:
|
| 263 |
+
self.model = self.model.to(self.device)
|
| 264 |
+
|
| 265 |
+
self.model.eval()
|
| 266 |
+
print(f"PaliGemma loaded: {model_id} on {self.device}!")
|
| 267 |
+
return
|
| 268 |
+
except Exception as e:
|
| 269 |
+
print(f"{model_id} failed: {e}")
|
| 270 |
+
continue
|
| 271 |
+
|
| 272 |
+
raise RuntimeError("Could not load any PaliGemma model")
|
| 273 |
+
|
| 274 |
+
def _init_qwen2vl(self):
|
| 275 |
+
"""Initialize Qwen2-VL (smallest 2B version)."""
|
| 276 |
+
import torch
|
| 277 |
+
|
| 278 |
+
try:
|
| 279 |
+
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
|
| 280 |
+
except ImportError:
|
| 281 |
+
from transformers import AutoModelForVision2Seq, AutoProcessor
|
| 282 |
+
Qwen2VLForConditionalGeneration = AutoModelForVision2Seq
|
| 283 |
+
|
| 284 |
+
self.device = self._get_device()
|
| 285 |
+
|
| 286 |
+
# Use larger Qwen2-VL models (bigger = better reasoning)
|
| 287 |
+
model_candidates = [
|
| 288 |
+
"Qwen/Qwen2-VL-72B-Instruct", # ~140GB, best quality
|
| 289 |
+
"Qwen/Qwen2-VL-7B-Instruct", # ~14GB, good balance
|
| 290 |
+
"Qwen/Qwen2-VL-2B-Instruct", # ~4GB, fallback
|
| 291 |
+
]
|
| 292 |
+
|
| 293 |
+
dtype = torch.float16 if self.device == "cuda" else torch.float32
|
| 294 |
+
|
| 295 |
+
for model_id in model_candidates:
|
| 296 |
+
try:
|
| 297 |
+
print(f"Trying {model_id}...")
|
| 298 |
+
|
| 299 |
+
self.processor = AutoProcessor.from_pretrained(
|
| 300 |
+
model_id, trust_remote_code=True
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
self.model = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 304 |
+
model_id,
|
| 305 |
+
torch_dtype=dtype,
|
| 306 |
+
device_map="auto" if self.device == "cuda" else None,
|
| 307 |
+
trust_remote_code=True,
|
| 308 |
+
low_cpu_mem_usage=True,
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
if self.device not in ["cuda"]:
|
| 312 |
+
self.model = self.model.to(self.device)
|
| 313 |
+
|
| 314 |
+
self.model.eval()
|
| 315 |
+
print(f"Qwen2-VL loaded: {model_id} on {self.device}!")
|
| 316 |
+
return
|
| 317 |
+
except Exception as e:
|
| 318 |
+
print(f"{model_id} failed: {e}")
|
| 319 |
+
continue
|
| 320 |
+
|
| 321 |
+
raise RuntimeError("Could not load any Qwen2-VL model")
|
| 322 |
+
|
| 323 |
+
def analyze(self, image_path: str) -> Dict:
|
| 324 |
+
"""Analyze an image for manipulation with timeout protection."""
|
| 325 |
+
if self.backend == "mock":
|
| 326 |
+
return self._analyze_mock(image_path)
|
| 327 |
+
|
| 328 |
+
def _run_analysis():
|
| 329 |
+
if self.backend == "blip2":
|
| 330 |
+
return self._analyze_blip2(image_path)
|
| 331 |
+
elif self.backend == "paligemma":
|
| 332 |
+
return self._analyze_paligemma(image_path)
|
| 333 |
+
elif self.backend == "qwen2vl":
|
| 334 |
+
return self._analyze_qwen2vl(image_path)
|
| 335 |
+
else:
|
| 336 |
+
return self._analyze_mock(image_path)
|
| 337 |
+
|
| 338 |
+
try:
|
| 339 |
+
with ThreadPoolExecutor(max_workers=1) as executor:
|
| 340 |
+
future = executor.submit(_run_analysis)
|
| 341 |
+
return future.result(timeout=VLM_TIMEOUT_SECONDS)
|
| 342 |
+
except FuturesTimeoutError:
|
| 343 |
+
print(f"VLM inference timed out after {VLM_TIMEOUT_SECONDS}s")
|
| 344 |
+
return self._analyze_mock(image_path)
|
| 345 |
+
except Exception as e:
|
| 346 |
+
print(f"Analysis error: {e}")
|
| 347 |
+
return self._analyze_mock(image_path)
|
| 348 |
+
|
| 349 |
+
def _analyze_blip2(self, image_path: str) -> Dict:
|
| 350 |
+
"""Analyze using BLIP-2 with multi-question approach."""
|
| 351 |
+
from PIL import Image
|
| 352 |
+
import torch
|
| 353 |
+
|
| 354 |
+
image = Image.open(image_path).convert("RGB")
|
| 355 |
+
|
| 356 |
+
# Questions for explainability - describe what VLM sees
|
| 357 |
+
questions = [
|
| 358 |
+
("Question: Describe the lighting and shadows in this image. Answer:", "lighting"),
|
| 359 |
+
("Question: Describe the textures in this image. Answer:", "texture"),
|
| 360 |
+
]
|
| 361 |
+
|
| 362 |
+
answers = []
|
| 363 |
+
reasoning_parts = []
|
| 364 |
+
|
| 365 |
+
for q, category in questions:
|
| 366 |
+
try:
|
| 367 |
+
inputs = self.processor(image, text=q, return_tensors="pt")
|
| 368 |
+
if self.device:
|
| 369 |
+
inputs = {k: v.to(self.device) if hasattr(v, 'to') else v
|
| 370 |
+
for k, v in inputs.items()}
|
| 371 |
+
|
| 372 |
+
with torch.no_grad():
|
| 373 |
+
generated_ids = self.model.generate(**inputs, max_new_tokens=20)
|
| 374 |
+
|
| 375 |
+
answer = self.processor.batch_decode(
|
| 376 |
+
generated_ids, skip_special_tokens=True
|
| 377 |
+
)[0].strip()
|
| 378 |
+
|
| 379 |
+
# Extract just the answer part
|
| 380 |
+
if "Answer:" in answer:
|
| 381 |
+
answer = answer.split("Answer:")[-1].strip()
|
| 382 |
+
|
| 383 |
+
answers.append((category, answer.lower()))
|
| 384 |
+
|
| 385 |
+
# Collect reasoning
|
| 386 |
+
if len(answer) > 5:
|
| 387 |
+
reasoning_parts.append(f"{category}: {answer[:60]}")
|
| 388 |
+
except Exception as e:
|
| 389 |
+
continue
|
| 390 |
+
|
| 391 |
+
return self._aggregate_blip2_responses(answers, reasoning_parts)
|
| 392 |
+
|
| 393 |
+
def _aggregate_blip2_responses(self, qa_pairs: List, reasoning_parts: List) -> Dict:
|
| 394 |
+
"""Aggregate BLIP-2 responses - focus on explainability, not detection."""
|
| 395 |
+
# BLIP-2 is used for EXPLAINABILITY (30% of competition score)
|
| 396 |
+
# Detection is handled by forensics - VLM provides reasoning
|
| 397 |
+
|
| 398 |
+
# Look for anomaly indicators in descriptions
|
| 399 |
+
anomaly_words = ["inconsistent", "unusual", "strange", "artificial",
|
| 400 |
+
"smooth", "unnatural", "blurry", "distorted"]
|
| 401 |
+
normal_words = ["natural", "realistic", "consistent", "detailed",
|
| 402 |
+
"normal", "clear", "sharp"]
|
| 403 |
+
|
| 404 |
+
anomaly_score = 0
|
| 405 |
+
normal_score = 0
|
| 406 |
+
|
| 407 |
+
for category, answer in qa_pairs:
|
| 408 |
+
anomaly_score += sum(1 for w in anomaly_words if w in answer)
|
| 409 |
+
normal_score += sum(1 for w in normal_words if w in answer)
|
| 410 |
+
|
| 411 |
+
# Build descriptive reasoning from VLM responses
|
| 412 |
+
reasoning = ". ".join(reasoning_parts[:3]) if reasoning_parts else "Visual analysis completed."
|
| 413 |
+
|
| 414 |
+
# Provide weak signal to fusion (forensics is primary detector)
|
| 415 |
+
# VLM observations can nudge the decision slightly
|
| 416 |
+
if anomaly_score > normal_score + 1:
|
| 417 |
+
detection = "uncertain" # Weak signal - let forensics decide
|
| 418 |
+
confidence = "low"
|
| 419 |
+
elif normal_score > anomaly_score + 1:
|
| 420 |
+
detection = "uncertain" # Weak signal - let forensics decide
|
| 421 |
+
confidence = "low"
|
| 422 |
+
else:
|
| 423 |
+
detection = "uncertain"
|
| 424 |
+
confidence = "low"
|
| 425 |
+
|
| 426 |
+
return {
|
| 427 |
+
"manipulation_detected": detection,
|
| 428 |
+
"confidence": confidence,
|
| 429 |
+
"manipulation_type": "unknown",
|
| 430 |
+
"reasoning": reasoning[:200],
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
def _analyze_paligemma(self, image_path: str) -> Dict:
|
| 434 |
+
"""Analyze using PaliGemma."""
|
| 435 |
+
from PIL import Image
|
| 436 |
+
import torch
|
| 437 |
+
|
| 438 |
+
image = Image.open(image_path).convert("RGB")
|
| 439 |
+
|
| 440 |
+
# Multi-question approach
|
| 441 |
+
questions = [
|
| 442 |
+
("Is this image real or AI-generated?", "main"),
|
| 443 |
+
("Are there shadow inconsistencies?", "shadow"),
|
| 444 |
+
("Are textures unnaturally smooth?", "texture"),
|
| 445 |
+
]
|
| 446 |
+
|
| 447 |
+
answers = []
|
| 448 |
+
for prompt, category in questions:
|
| 449 |
+
try:
|
| 450 |
+
inputs = self.processor(text=prompt, images=image, return_tensors="pt")
|
| 451 |
+
inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
|
| 452 |
+
|
| 453 |
+
with torch.no_grad():
|
| 454 |
+
outputs = self.model.generate(**inputs, max_new_tokens=50)
|
| 455 |
+
|
| 456 |
+
response = self.processor.decode(outputs[0], skip_special_tokens=True)
|
| 457 |
+
answers.append((category, response.lower()))
|
| 458 |
+
except Exception as e:
|
| 459 |
+
continue
|
| 460 |
+
|
| 461 |
+
return self._aggregate_qa_responses(answers)
|
| 462 |
+
|
| 463 |
+
def _analyze_qwen2vl(self, image_path: str) -> Dict:
|
| 464 |
+
"""Analyze using Qwen2-VL."""
|
| 465 |
+
from PIL import Image
|
| 466 |
+
import torch
|
| 467 |
+
|
| 468 |
+
image = Image.open(image_path).convert("RGB")
|
| 469 |
+
|
| 470 |
+
messages = [
|
| 471 |
+
{
|
| 472 |
+
"role": "user",
|
| 473 |
+
"content": [
|
| 474 |
+
{"type": "image", "image": image},
|
| 475 |
+
{"type": "text", "text": REAL_ESTATE_PROMPT}
|
| 476 |
+
]
|
| 477 |
+
}
|
| 478 |
+
]
|
| 479 |
+
|
| 480 |
+
text = self.processor.apply_chat_template(
|
| 481 |
+
messages, tokenize=False, add_generation_prompt=True
|
| 482 |
+
)
|
| 483 |
+
inputs = self.processor(
|
| 484 |
+
text=[text], images=[image], return_tensors="pt", padding=True
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
if self.device:
|
| 488 |
+
inputs = {k: v.to(self.device) if hasattr(v, 'to') else v
|
| 489 |
+
for k, v in inputs.items()}
|
| 490 |
+
|
| 491 |
+
with torch.no_grad():
|
| 492 |
+
outputs = self.model.generate(**inputs, max_new_tokens=200)
|
| 493 |
+
|
| 494 |
+
response = self.processor.batch_decode(outputs, skip_special_tokens=True)[0]
|
| 495 |
+
|
| 496 |
+
if "assistant" in response.lower():
|
| 497 |
+
response = response.split("assistant")[-1].strip()
|
| 498 |
+
|
| 499 |
+
return self._parse_structured_response(response)
|
| 500 |
+
|
| 501 |
+
def _analyze_mock(self, image_path: str) -> Dict:
|
| 502 |
+
"""Mock analysis when no VLM available."""
|
| 503 |
+
return {
|
| 504 |
+
"manipulation_detected": "uncertain",
|
| 505 |
+
"confidence": "low",
|
| 506 |
+
"manipulation_type": "unknown",
|
| 507 |
+
"reasoning": "VLM backend unavailable - using forensic signals only."
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
def _aggregate_qa_responses(self, qa_pairs: List) -> Dict:
|
| 511 |
+
"""Aggregate multi-question responses into final result."""
|
| 512 |
+
fake_signals = ["generated", "fake", "artificial", "synthetic", "manipulated",
|
| 513 |
+
"artifacts", "unnatural", "inconsistent", "smooth", "yes"]
|
| 514 |
+
real_signals = ["real", "natural", "authentic", "consistent", "genuine",
|
| 515 |
+
"photograph", "no", "match", "normal"]
|
| 516 |
+
|
| 517 |
+
fake_score = 0
|
| 518 |
+
real_score = 0
|
| 519 |
+
staging_detected = False
|
| 520 |
+
reasoning_parts = []
|
| 521 |
+
|
| 522 |
+
for category, answer in qa_pairs:
|
| 523 |
+
answer_lower = answer.lower()
|
| 524 |
+
|
| 525 |
+
fake_in = sum(1 for s in fake_signals if s in answer_lower)
|
| 526 |
+
real_in = sum(1 for s in real_signals if s in answer_lower)
|
| 527 |
+
|
| 528 |
+
# Weight main question more
|
| 529 |
+
weight = 2 if category == "main" else 1
|
| 530 |
+
fake_score += fake_in * weight
|
| 531 |
+
real_score += real_in * weight
|
| 532 |
+
|
| 533 |
+
if category == "staging" and fake_in > 0:
|
| 534 |
+
staging_detected = True
|
| 535 |
+
|
| 536 |
+
if category in ["shadow", "texture"] and len(answer) > 10:
|
| 537 |
+
reasoning_parts.append(answer[:60])
|
| 538 |
+
|
| 539 |
+
# Determine verdict
|
| 540 |
+
if fake_score > real_score + 2:
|
| 541 |
+
detection = "yes"
|
| 542 |
+
confidence = "high" if fake_score > 5 else "medium"
|
| 543 |
+
elif real_score > fake_score + 2:
|
| 544 |
+
detection = "no"
|
| 545 |
+
confidence = "high" if real_score > 5 else "medium"
|
| 546 |
+
else:
|
| 547 |
+
detection = "uncertain"
|
| 548 |
+
confidence = "low"
|
| 549 |
+
|
| 550 |
+
# Determine type
|
| 551 |
+
if staging_detected:
|
| 552 |
+
manip_type = "virtual_staging"
|
| 553 |
+
elif detection == "yes":
|
| 554 |
+
manip_type = "manipulation_detected"
|
| 555 |
+
else:
|
| 556 |
+
manip_type = "authentic"
|
| 557 |
+
|
| 558 |
+
reasoning = " ".join(reasoning_parts)[:200] or "Visual analysis completed."
|
| 559 |
+
|
| 560 |
+
return {
|
| 561 |
+
"manipulation_detected": detection,
|
| 562 |
+
"confidence": confidence,
|
| 563 |
+
"manipulation_type": manip_type,
|
| 564 |
+
"reasoning": reasoning,
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
def _parse_structured_response(self, response: str) -> Dict:
|
| 568 |
+
"""Parse structured VLM response."""
|
| 569 |
+
result = {
|
| 570 |
+
"manipulation_detected": "uncertain",
|
| 571 |
+
"confidence": "low",
|
| 572 |
+
"manipulation_type": "unknown",
|
| 573 |
+
"reasoning": ""
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
lines = response.split('\n')
|
| 577 |
+
|
| 578 |
+
# Parse MANIPULATION_DETECTED / VERDICT
|
| 579 |
+
for line in lines:
|
| 580 |
+
line_upper = line.upper()
|
| 581 |
+
if 'MANIPULATION_DETECTED:' in line_upper or 'VERDICT:' in line_upper:
|
| 582 |
+
if 'YES' in line_upper or 'FAKE' in line_upper:
|
| 583 |
+
result["manipulation_detected"] = "yes"
|
| 584 |
+
elif 'NO' in line_upper or 'REAL' in line_upper:
|
| 585 |
+
result["manipulation_detected"] = "no"
|
| 586 |
+
break
|
| 587 |
+
|
| 588 |
+
# Fallback keyword detection
|
| 589 |
+
if result["manipulation_detected"] == "uncertain":
|
| 590 |
+
text_lower = response.lower()
|
| 591 |
+
fake_kw = ["manipulated", "fake", "generated", "synthetic", "staged"]
|
| 592 |
+
real_kw = ["authentic", "genuine", "real photograph", "not manipulated"]
|
| 593 |
+
|
| 594 |
+
if any(kw in text_lower for kw in fake_kw):
|
| 595 |
+
result["manipulation_detected"] = "yes"
|
| 596 |
+
elif any(kw in text_lower for kw in real_kw):
|
| 597 |
+
result["manipulation_detected"] = "no"
|
| 598 |
+
|
| 599 |
+
# Parse CONFIDENCE
|
| 600 |
+
for line in lines:
|
| 601 |
+
if 'CONFIDENCE:' in line.upper():
|
| 602 |
+
if 'HIGH' in line.upper():
|
| 603 |
+
result["confidence"] = "high"
|
| 604 |
+
elif 'MEDIUM' in line.upper():
|
| 605 |
+
result["confidence"] = "medium"
|
| 606 |
+
break
|
| 607 |
+
|
| 608 |
+
# Parse TYPE
|
| 609 |
+
for line in lines:
|
| 610 |
+
if 'MANIPULATION_TYPE:' in line.upper() or 'TYPE:' in line.upper():
|
| 611 |
+
type_val = line.split(':', 1)[-1].strip().lower().replace(" ", "_")
|
| 612 |
+
if type_val in ["authentic", "virtual_staging", "inpainting", "full_synthesis"]:
|
| 613 |
+
result["manipulation_type"] = type_val
|
| 614 |
+
break
|
| 615 |
+
|
| 616 |
+
if result["manipulation_type"] == "unknown":
|
| 617 |
+
result["manipulation_type"] = (
|
| 618 |
+
"manipulation_detected" if result["manipulation_detected"] == "yes"
|
| 619 |
+
else "authentic"
|
| 620 |
+
)
|
| 621 |
+
|
| 622 |
+
# Parse REASONING
|
| 623 |
+
for line in lines:
|
| 624 |
+
if line.upper().startswith('REASONING:') or line.upper().startswith('REASON:'):
|
| 625 |
+
result["reasoning"] = line.split(':', 1)[-1].strip()
|
| 626 |
+
break
|
| 627 |
+
|
| 628 |
+
if not result["reasoning"]:
|
| 629 |
+
# Extract evidence sentences
|
| 630 |
+
sentences = re.split(r'[.!?]', response)
|
| 631 |
+
evidence = [s.strip() for s in sentences
|
| 632 |
+
if any(kw in s.lower() for kw in
|
| 633 |
+
["shadow", "light", "texture", "reflect", "artifact"])]
|
| 634 |
+
result["reasoning"] = ". ".join(evidence[:2])[:200] or "Analysis completed."
|
| 635 |
+
|
| 636 |
+
return result
|
test_ensemble.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Test ensemble of CLIP + Forensics."""
|
| 3 |
+
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.insert(0, '.')
|
| 6 |
+
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
import numpy as np
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import torch
|
| 11 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 12 |
+
from src.forensics.detector import ForensicDetector
|
| 13 |
+
|
| 14 |
+
REAL_DIR = Path("data/real")
|
| 15 |
+
FAKE_DIR = Path("data/ai_generated_v2")
|
| 16 |
+
|
| 17 |
+
def load_images(directory, pattern="*"):
|
| 18 |
+
images = []
|
| 19 |
+
extensions = {'.jpg', '.jpeg', '.png', '.webp'}
|
| 20 |
+
for ext in extensions:
|
| 21 |
+
for f in directory.glob(f"{pattern}{ext}"):
|
| 22 |
+
try:
|
| 23 |
+
img = Image.open(f).convert("RGB")
|
| 24 |
+
images.append((f.name, f, img))
|
| 25 |
+
except:
|
| 26 |
+
pass
|
| 27 |
+
return images
|
| 28 |
+
|
| 29 |
+
def main():
|
| 30 |
+
print("Loading models...")
|
| 31 |
+
|
| 32 |
+
# CLIP
|
| 33 |
+
model = CLIPModel.from_pretrained("laion/CLIP-ViT-L-14-laion2B-s32B-b82K")
|
| 34 |
+
processor = CLIPProcessor.from_pretrained("laion/CLIP-ViT-L-14-laion2B-s32B-b82K")
|
| 35 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 36 |
+
model = model.to(device)
|
| 37 |
+
model.eval()
|
| 38 |
+
|
| 39 |
+
labels = [
|
| 40 |
+
"a real photograph",
|
| 41 |
+
"an AI-generated image",
|
| 42 |
+
"a computer-generated image",
|
| 43 |
+
"a synthetic image created by artificial intelligence"
|
| 44 |
+
]
|
| 45 |
+
|
| 46 |
+
# Forensics
|
| 47 |
+
forensic = ForensicDetector()
|
| 48 |
+
|
| 49 |
+
# Load images
|
| 50 |
+
real_estate = load_images(REAL_DIR)
|
| 51 |
+
fake_v2 = [(n, p, i) for n, p, i in load_images(FAKE_DIR, "*_fake_*")]
|
| 52 |
+
real_v2 = [(n, p, i) for n, p, i in load_images(FAKE_DIR, "*_real_*")]
|
| 53 |
+
|
| 54 |
+
all_real = real_estate + real_v2
|
| 55 |
+
all_fake = fake_v2
|
| 56 |
+
|
| 57 |
+
print(f"Testing {len(all_real)} real, {len(all_fake)} fake images")
|
| 58 |
+
|
| 59 |
+
results = []
|
| 60 |
+
|
| 61 |
+
for label, images, is_fake in [("REAL", all_real, False), ("FAKE", all_fake, True)]:
|
| 62 |
+
print(f"\n=== {label} ===")
|
| 63 |
+
for name, path, img in images:
|
| 64 |
+
# CLIP score
|
| 65 |
+
inputs = processor(text=labels, images=img, return_tensors="pt", padding=True).to(device)
|
| 66 |
+
with torch.no_grad():
|
| 67 |
+
outputs = model(**inputs)
|
| 68 |
+
probs = outputs.logits_per_image.softmax(dim=1).cpu().numpy()[0]
|
| 69 |
+
|
| 70 |
+
real_prob = probs[0]
|
| 71 |
+
ai_prob = max(probs[1], probs[2], probs[3])
|
| 72 |
+
clip_score = ai_prob / (real_prob + ai_prob + 1e-10)
|
| 73 |
+
|
| 74 |
+
# Forensic score
|
| 75 |
+
forensic_results = forensic.analyze(str(path))
|
| 76 |
+
forensic_score = forensic_results['aggregate_score']
|
| 77 |
+
|
| 78 |
+
# Ensemble - try different weights
|
| 79 |
+
for w_clip in [0.7, 0.8, 0.9]:
|
| 80 |
+
ensemble = w_clip * clip_score + (1 - w_clip) * forensic_score
|
| 81 |
+
results.append({
|
| 82 |
+
'name': name,
|
| 83 |
+
'is_fake': is_fake,
|
| 84 |
+
'clip': clip_score,
|
| 85 |
+
'forensic': forensic_score,
|
| 86 |
+
f'ensemble_{w_clip}': ensemble,
|
| 87 |
+
})
|
| 88 |
+
|
| 89 |
+
print(f"{name}: CLIP={clip_score:.3f}, Forensic={forensic_score:.3f}")
|
| 90 |
+
|
| 91 |
+
# Calculate accuracies
|
| 92 |
+
print("\n" + "="*60)
|
| 93 |
+
print("ACCURACY SUMMARY")
|
| 94 |
+
print("="*60)
|
| 95 |
+
|
| 96 |
+
for method in ['clip', 'forensic', 'ensemble_0.7', 'ensemble_0.8', 'ensemble_0.9']:
|
| 97 |
+
# Group by unique images (results has duplicates due to ensemble weights)
|
| 98 |
+
seen = set()
|
| 99 |
+
real_correct = 0
|
| 100 |
+
real_total = 0
|
| 101 |
+
fake_correct = 0
|
| 102 |
+
fake_total = 0
|
| 103 |
+
|
| 104 |
+
for r in results:
|
| 105 |
+
if r['name'] in seen:
|
| 106 |
+
continue
|
| 107 |
+
seen.add(r['name'])
|
| 108 |
+
|
| 109 |
+
score = r.get(method, r.get('clip') if 'ensemble' in method else 0)
|
| 110 |
+
if 'ensemble' in method:
|
| 111 |
+
w = float(method.split('_')[1])
|
| 112 |
+
score = w * r['clip'] + (1-w) * r['forensic']
|
| 113 |
+
|
| 114 |
+
if r['is_fake']:
|
| 115 |
+
fake_total += 1
|
| 116 |
+
if score >= 0.5:
|
| 117 |
+
fake_correct += 1
|
| 118 |
+
else:
|
| 119 |
+
real_total += 1
|
| 120 |
+
if score < 0.5:
|
| 121 |
+
real_correct += 1
|
| 122 |
+
|
| 123 |
+
total = real_total + fake_total
|
| 124 |
+
overall = (real_correct + fake_correct) / total * 100 if total > 0 else 0
|
| 125 |
+
print(f"{method:20s}: Real {real_correct}/{real_total}, Fake {fake_correct}/{fake_total}, Overall {overall:.1f}%")
|
| 126 |
+
|
| 127 |
+
if __name__ == "__main__":
|
| 128 |
+
main()
|
test_forensics.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Quick test of forensic module."""
|
| 3 |
+
|
| 4 |
+
import sys
|
| 5 |
+
sys.path.insert(0, '.')
|
| 6 |
+
|
| 7 |
+
from src.forensics.detector import ForensicDetector
|
| 8 |
+
|
| 9 |
+
def test_with_image(image_path):
|
| 10 |
+
print(f"Testing with: {image_path}")
|
| 11 |
+
detector = ForensicDetector()
|
| 12 |
+
results = detector.analyze(image_path)
|
| 13 |
+
|
| 14 |
+
print("\nForensic Analysis Results:")
|
| 15 |
+
for key, value in results.items():
|
| 16 |
+
print(f" {key}: {value:.3f}" if isinstance(value, float) else f" {key}: {value}")
|
| 17 |
+
|
| 18 |
+
return results
|
| 19 |
+
|
| 20 |
+
if __name__ == "__main__":
|
| 21 |
+
if len(sys.argv) > 1:
|
| 22 |
+
test_with_image(sys.argv[1])
|
| 23 |
+
else:
|
| 24 |
+
print("Usage: python test_forensics.py <image_path>")
|
| 25 |
+
print("\nTo test, download a sample image first")
|
test_pretrained_detectors.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Test pre-trained AI image detectors on Flux-generated images.
|
| 4 |
+
No fine-tuning - just evaluation of existing models.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import json
|
| 9 |
+
import time
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from PIL import Image
|
| 12 |
+
import torch
|
| 13 |
+
from transformers import pipeline, AutoModelForImageClassification, AutoImageProcessor
|
| 14 |
+
import numpy as np
|
| 15 |
+
from tqdm import tqdm
|
| 16 |
+
|
| 17 |
+
# Paths
|
| 18 |
+
REAL_DIR = Path("data/real")
|
| 19 |
+
FAKE_DIR = Path("data/ai_generated_v2")
|
| 20 |
+
|
| 21 |
+
# Models to test
|
| 22 |
+
MODELS = [
|
| 23 |
+
# Current baseline
|
| 24 |
+
"umm-maybe/AI-image-detector",
|
| 25 |
+
# SDXL-specific detector (Swin Transformer)
|
| 26 |
+
"Organika/sdxl-detector",
|
| 27 |
+
# Fine-tuned on 2024 generators including Flux
|
| 28 |
+
"Smogy/SMOGY-Ai-images-detector",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
def load_images(directory, limit=None):
|
| 32 |
+
"""Load images from directory."""
|
| 33 |
+
images = []
|
| 34 |
+
extensions = {'.jpg', '.jpeg', '.png', '.webp'}
|
| 35 |
+
files = sorted([f for f in directory.iterdir() if f.suffix.lower() in extensions])
|
| 36 |
+
if limit:
|
| 37 |
+
files = files[:limit]
|
| 38 |
+
for f in files:
|
| 39 |
+
try:
|
| 40 |
+
img = Image.open(f).convert("RGB")
|
| 41 |
+
images.append((f.name, img))
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f"Error loading {f}: {e}")
|
| 44 |
+
return images
|
| 45 |
+
|
| 46 |
+
def test_detector(model_name, real_images, fake_images):
|
| 47 |
+
"""Test a single detector model."""
|
| 48 |
+
print(f"\n{'='*60}")
|
| 49 |
+
print(f"Testing: {model_name}")
|
| 50 |
+
print('='*60)
|
| 51 |
+
|
| 52 |
+
try:
|
| 53 |
+
# Load model
|
| 54 |
+
start = time.time()
|
| 55 |
+
classifier = pipeline(
|
| 56 |
+
"image-classification",
|
| 57 |
+
model=model_name,
|
| 58 |
+
device=0 if torch.cuda.is_available() else -1
|
| 59 |
+
)
|
| 60 |
+
load_time = time.time() - start
|
| 61 |
+
print(f"Model loaded in {load_time:.1f}s")
|
| 62 |
+
|
| 63 |
+
# Get label mapping - different models use different labels
|
| 64 |
+
results = {"real": [], "fake": [], "model": model_name}
|
| 65 |
+
|
| 66 |
+
# Test real images
|
| 67 |
+
print(f"\nTesting {len(real_images)} real images...")
|
| 68 |
+
correct_real = 0
|
| 69 |
+
for name, img in tqdm(real_images):
|
| 70 |
+
try:
|
| 71 |
+
pred = classifier(img)
|
| 72 |
+
# Find the "real" or "human" score
|
| 73 |
+
score = 0.0
|
| 74 |
+
for p in pred:
|
| 75 |
+
label = p['label'].lower()
|
| 76 |
+
if 'artificial' in label or 'ai' in label or 'fake' in label:
|
| 77 |
+
score = p['score']
|
| 78 |
+
break
|
| 79 |
+
elif 'human' in label or 'real' in label:
|
| 80 |
+
score = 1.0 - p['score']
|
| 81 |
+
break
|
| 82 |
+
|
| 83 |
+
is_correct = score < 0.5 # Real images should have low AI score
|
| 84 |
+
correct_real += is_correct
|
| 85 |
+
results["real"].append({
|
| 86 |
+
"name": name,
|
| 87 |
+
"ai_score": score,
|
| 88 |
+
"correct": is_correct,
|
| 89 |
+
"raw": pred
|
| 90 |
+
})
|
| 91 |
+
except Exception as e:
|
| 92 |
+
print(f"Error on {name}: {e}")
|
| 93 |
+
results["real"].append({"name": name, "error": str(e)})
|
| 94 |
+
|
| 95 |
+
# Test fake images
|
| 96 |
+
print(f"Testing {len(fake_images)} fake (AI-generated) images...")
|
| 97 |
+
correct_fake = 0
|
| 98 |
+
for name, img in tqdm(fake_images):
|
| 99 |
+
try:
|
| 100 |
+
pred = classifier(img)
|
| 101 |
+
# Find the "AI" or "artificial" score
|
| 102 |
+
score = 0.0
|
| 103 |
+
for p in pred:
|
| 104 |
+
label = p['label'].lower()
|
| 105 |
+
if 'artificial' in label or 'ai' in label or 'fake' in label:
|
| 106 |
+
score = p['score']
|
| 107 |
+
break
|
| 108 |
+
elif 'human' in label or 'real' in label:
|
| 109 |
+
score = 1.0 - p['score']
|
| 110 |
+
break
|
| 111 |
+
|
| 112 |
+
is_correct = score >= 0.5 # Fake images should have high AI score
|
| 113 |
+
correct_fake += is_correct
|
| 114 |
+
results["fake"].append({
|
| 115 |
+
"name": name,
|
| 116 |
+
"ai_score": score,
|
| 117 |
+
"correct": is_correct,
|
| 118 |
+
"raw": pred
|
| 119 |
+
})
|
| 120 |
+
except Exception as e:
|
| 121 |
+
print(f"Error on {name}: {e}")
|
| 122 |
+
results["fake"].append({"name": name, "error": str(e)})
|
| 123 |
+
|
| 124 |
+
# Calculate metrics
|
| 125 |
+
total_real = len([r for r in results["real"] if "error" not in r])
|
| 126 |
+
total_fake = len([r for r in results["fake"] if "error" not in r])
|
| 127 |
+
|
| 128 |
+
real_acc = correct_real / total_real * 100 if total_real > 0 else 0
|
| 129 |
+
fake_acc = correct_fake / total_fake * 100 if total_fake > 0 else 0
|
| 130 |
+
overall_acc = (correct_real + correct_fake) / (total_real + total_fake) * 100 if (total_real + total_fake) > 0 else 0
|
| 131 |
+
|
| 132 |
+
print(f"\n๐ Results for {model_name}:")
|
| 133 |
+
print(f" Real images: {correct_real}/{total_real} ({real_acc:.1f}%)")
|
| 134 |
+
print(f" Fake images: {correct_fake}/{total_fake} ({fake_acc:.1f}%)")
|
| 135 |
+
print(f" Overall: {overall_acc:.1f}%")
|
| 136 |
+
|
| 137 |
+
results["metrics"] = {
|
| 138 |
+
"real_accuracy": real_acc,
|
| 139 |
+
"fake_accuracy": fake_acc,
|
| 140 |
+
"overall_accuracy": overall_acc,
|
| 141 |
+
"correct_real": correct_real,
|
| 142 |
+
"correct_fake": correct_fake,
|
| 143 |
+
"total_real": total_real,
|
| 144 |
+
"total_fake": total_fake
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
# Clean up
|
| 148 |
+
del classifier
|
| 149 |
+
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
| 150 |
+
|
| 151 |
+
return results
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
print(f"โ Failed to load/run model: {e}")
|
| 155 |
+
import traceback
|
| 156 |
+
traceback.print_exc()
|
| 157 |
+
return {"model": model_name, "error": str(e)}
|
| 158 |
+
|
| 159 |
+
def test_clip_zero_shot():
|
| 160 |
+
"""Test CLIP ViT-L with zero-shot classification."""
|
| 161 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 162 |
+
|
| 163 |
+
print(f"\n{'='*60}")
|
| 164 |
+
print("Testing: CLIP ViT-L Zero-Shot (laion/CLIP-ViT-L-14-laion2B-s32B-b82K)")
|
| 165 |
+
print('='*60)
|
| 166 |
+
|
| 167 |
+
try:
|
| 168 |
+
model = CLIPModel.from_pretrained("laion/CLIP-ViT-L-14-laion2B-s32B-b82K")
|
| 169 |
+
processor = CLIPProcessor.from_pretrained("laion/CLIP-ViT-L-14-laion2B-s32B-b82K")
|
| 170 |
+
|
| 171 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 172 |
+
model = model.to(device)
|
| 173 |
+
model.eval()
|
| 174 |
+
|
| 175 |
+
# Zero-shot labels
|
| 176 |
+
labels = [
|
| 177 |
+
"a real photograph",
|
| 178 |
+
"an AI-generated image",
|
| 179 |
+
"a computer-generated image",
|
| 180 |
+
"a synthetic image created by artificial intelligence"
|
| 181 |
+
]
|
| 182 |
+
|
| 183 |
+
real_images = load_images(REAL_DIR)
|
| 184 |
+
fake_images = load_images(FAKE_DIR)
|
| 185 |
+
|
| 186 |
+
results = {"real": [], "fake": [], "model": "CLIP-ViT-L Zero-Shot"}
|
| 187 |
+
correct_real = 0
|
| 188 |
+
correct_fake = 0
|
| 189 |
+
|
| 190 |
+
print(f"\nTesting {len(real_images)} real images...")
|
| 191 |
+
for name, img in tqdm(real_images):
|
| 192 |
+
inputs = processor(text=labels, images=img, return_tensors="pt", padding=True).to(device)
|
| 193 |
+
with torch.no_grad():
|
| 194 |
+
outputs = model(**inputs)
|
| 195 |
+
logits = outputs.logits_per_image
|
| 196 |
+
probs = logits.softmax(dim=1).cpu().numpy()[0]
|
| 197 |
+
|
| 198 |
+
# Real photo is label 0, AI labels are 1,2,3
|
| 199 |
+
real_prob = probs[0]
|
| 200 |
+
ai_prob = max(probs[1], probs[2], probs[3])
|
| 201 |
+
is_correct = real_prob > ai_prob
|
| 202 |
+
correct_real += is_correct
|
| 203 |
+
results["real"].append({"name": name, "real_prob": float(real_prob), "ai_prob": float(ai_prob), "correct": is_correct})
|
| 204 |
+
|
| 205 |
+
print(f"Testing {len(fake_images)} fake images...")
|
| 206 |
+
for name, img in tqdm(fake_images):
|
| 207 |
+
inputs = processor(text=labels, images=img, return_tensors="pt", padding=True).to(device)
|
| 208 |
+
with torch.no_grad():
|
| 209 |
+
outputs = model(**inputs)
|
| 210 |
+
logits = outputs.logits_per_image
|
| 211 |
+
probs = logits.softmax(dim=1).cpu().numpy()[0]
|
| 212 |
+
|
| 213 |
+
real_prob = probs[0]
|
| 214 |
+
ai_prob = max(probs[1], probs[2], probs[3])
|
| 215 |
+
is_correct = ai_prob > real_prob
|
| 216 |
+
correct_fake += is_correct
|
| 217 |
+
results["fake"].append({"name": name, "real_prob": float(real_prob), "ai_prob": float(ai_prob), "correct": is_correct})
|
| 218 |
+
|
| 219 |
+
total_real = len(real_images)
|
| 220 |
+
total_fake = len(fake_images)
|
| 221 |
+
real_acc = correct_real / total_real * 100 if total_real > 0 else 0
|
| 222 |
+
fake_acc = correct_fake / total_fake * 100 if total_fake > 0 else 0
|
| 223 |
+
overall_acc = (correct_real + correct_fake) / (total_real + total_fake) * 100
|
| 224 |
+
|
| 225 |
+
print(f"\n๐ Results for CLIP ViT-L Zero-Shot:")
|
| 226 |
+
print(f" Real images: {correct_real}/{total_real} ({real_acc:.1f}%)")
|
| 227 |
+
print(f" Fake images: {correct_fake}/{total_fake} ({fake_acc:.1f}%)")
|
| 228 |
+
print(f" Overall: {overall_acc:.1f}%")
|
| 229 |
+
|
| 230 |
+
results["metrics"] = {
|
| 231 |
+
"real_accuracy": real_acc,
|
| 232 |
+
"fake_accuracy": fake_acc,
|
| 233 |
+
"overall_accuracy": overall_acc
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
del model
|
| 237 |
+
torch.cuda.empty_cache() if torch.cuda.is_available() else None
|
| 238 |
+
|
| 239 |
+
return results
|
| 240 |
+
|
| 241 |
+
except Exception as e:
|
| 242 |
+
print(f"โ Failed: {e}")
|
| 243 |
+
import traceback
|
| 244 |
+
traceback.print_exc()
|
| 245 |
+
return {"model": "CLIP-ViT-L Zero-Shot", "error": str(e)}
|
| 246 |
+
|
| 247 |
+
def main():
|
| 248 |
+
print("๐ Pre-trained AI Image Detector Evaluation")
|
| 249 |
+
print(f"Real images: {REAL_DIR}")
|
| 250 |
+
print(f"Fake images: {FAKE_DIR}")
|
| 251 |
+
print(f"Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")
|
| 252 |
+
|
| 253 |
+
# Load all images once
|
| 254 |
+
real_images = load_images(REAL_DIR)
|
| 255 |
+
fake_images = load_images(FAKE_DIR)
|
| 256 |
+
print(f"\nLoaded {len(real_images)} real, {len(fake_images)} fake images")
|
| 257 |
+
|
| 258 |
+
all_results = []
|
| 259 |
+
|
| 260 |
+
# Test each model
|
| 261 |
+
for model_name in MODELS:
|
| 262 |
+
result = test_detector(model_name, real_images, fake_images)
|
| 263 |
+
all_results.append(result)
|
| 264 |
+
|
| 265 |
+
# Test CLIP zero-shot
|
| 266 |
+
clip_result = test_clip_zero_shot()
|
| 267 |
+
all_results.append(clip_result)
|
| 268 |
+
|
| 269 |
+
# Summary
|
| 270 |
+
print("\n" + "="*60)
|
| 271 |
+
print("๐ SUMMARY - All Models")
|
| 272 |
+
print("="*60)
|
| 273 |
+
print(f"{'Model':<45} {'Real%':>8} {'Fake%':>8} {'Overall':>8}")
|
| 274 |
+
print("-"*70)
|
| 275 |
+
|
| 276 |
+
for r in all_results:
|
| 277 |
+
if "error" in r:
|
| 278 |
+
print(f"{r['model']:<45} {'ERROR':>8}")
|
| 279 |
+
else:
|
| 280 |
+
m = r.get("metrics", {})
|
| 281 |
+
print(f"{r['model']:<45} {m.get('real_accuracy', 0):>7.1f}% {m.get('fake_accuracy', 0):>7.1f}% {m.get('overall_accuracy', 0):>7.1f}%")
|
| 282 |
+
|
| 283 |
+
# Save results
|
| 284 |
+
with open("detector_comparison.json", "w") as f:
|
| 285 |
+
# Convert non-serializable items
|
| 286 |
+
def serialize(obj):
|
| 287 |
+
if isinstance(obj, (np.floating, np.integer)):
|
| 288 |
+
return float(obj)
|
| 289 |
+
if isinstance(obj, np.ndarray):
|
| 290 |
+
return obj.tolist()
|
| 291 |
+
return str(obj)
|
| 292 |
+
json.dump(all_results, f, indent=2, default=serialize)
|
| 293 |
+
|
| 294 |
+
print("\nResults saved to detector_comparison.json")
|
| 295 |
+
|
| 296 |
+
# Find best model
|
| 297 |
+
best = max([r for r in all_results if "error" not in r],
|
| 298 |
+
key=lambda x: x.get("metrics", {}).get("overall_accuracy", 0))
|
| 299 |
+
print(f"\n๐ Best model: {best['model']} ({best.get('metrics', {}).get('overall_accuracy', 0):.1f}% accuracy)")
|
| 300 |
+
|
| 301 |
+
if __name__ == "__main__":
|
| 302 |
+
main()
|