File size: 7,479 Bytes
928b74f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | """
branches/diffusion_branch.py
-----------------------------
Branch 5: Diffusion Residual Analysis Branch
STATUS: COMPLETE β no training required (signal processing)
Detects denoising traces and residual noise patterns left by diffusion models
(e.g. Stable Diffusion, DALL-E, Midjourney).
Technique:
- Residual noise map : image β gaussian_blur(image)
- High-pass filtering : captures fine-grained noise structure
- Noise statistics : kurtosis, variance, power spectral density
- Local noise uniformity : AI images have suspiciously uniform noise
Research background:
Corvi et al. (2023) demonstrated that diffusion models leave detectable
denoising artifacts in the residual noise domain. Unlike real cameras
(photon shot noise, sensor noise), diffusion residuals show elevated
uniformity and dampened high-frequency kurtosis.
Output:
{
"prob_fake" : float in [0, 1],
"confidence" : float in [0, 1],
"noise_map" : np.ndarray (H, W) β residual noise for visualization
}
"""
import numpy as np
import cv2
from scipy.stats import kurtosis, skew
from scipy.signal import welch
from utils.image_utils import to_grayscale
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Internal Helpers
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _compute_residual_noise(gray: np.ndarray, sigma: int = 3) -> np.ndarray:
"""
Residual noise map = image β Gaussian-smoothed image.
Real camera noise is random; diffusion noise shows structured patterns.
Returns residual (H, W) float32, can have negative values.
"""
gray_u8 = (np.clip(gray, 0, 1) * 255).astype(np.uint8)
kernel_size = sigma * 6 + 1 # 6Ο rule
blurred = cv2.GaussianBlur(gray_u8, (kernel_size, kernel_size), sigma)
residual = gray_u8.astype(np.float32) - blurred.astype(np.float32)
return residual
def _noise_kurtosis_score(residual: np.ndarray) -> float:
"""
Real camera noise follows near-Gaussian distribution (kurtosis β 3).
Diffusion model residuals are flatter (lower kurtosis) or spikier.
Returns score in [0, 1].
"""
flat = residual.flatten()
kurt = float(kurtosis(flat, fisher=False)) # excess kurtosis off
# Real camera: 2.5 < kurt < 4.5
# Diffusion : kurt < 2.5 (flatter) or > 8 (over-structured)
if kurt < 2.5:
score = np.clip((2.5 - kurt) / 2.5, 0.0, 1.0)
elif kurt > 6.0:
score = np.clip((kurt - 6.0) / 10.0, 0.0, 1.0)
else:
score = 0.0
return float(score)
def _noise_variance_score(residual: np.ndarray) -> float:
"""
Very low residual variance: AI image may have been denoised too aggressively.
Very high variance: fake noise injection.
Returns score in [0, 1].
"""
var = float(np.var(residual))
# Typical real image residual variance: 2.0β20.0
if var < 1.5:
score = np.clip((1.5 - var) / 1.5, 0.0, 1.0)
elif var > 30.0:
score = np.clip((var - 30.0) / 50.0, 0.0, 1.0)
else:
score = 0.0
return float(score)
def _noise_uniformity_score(residual: np.ndarray) -> float:
"""
Local noise variance uniformity across image patches.
Real cameras: noise varies by region (ISO, lighting).
Diffusion models: noise is spatially uniform.
Returns score in [0, 1] β higher = more uniform = more likely fake.
"""
H, W = residual.shape
patch_size = 32
local_vars = []
for r in range(0, H - patch_size, patch_size):
for c in range(0, W - patch_size, patch_size):
patch = residual[r:r+patch_size, c:c+patch_size]
local_vars.append(float(np.var(patch)))
if len(local_vars) < 4:
return 0.5
cv_of_var = float(np.std(local_vars) / (np.mean(local_vars) + 1e-8))
# Real camera: high coefficient of variation (0.5β2.0) β spatially non-uniform
# Diffusion : low coefficient of variation (< 0.3) β spatially uniform
score = np.clip((0.5 - cv_of_var) / 0.5, 0.0, 1.0)
return float(score)
def _high_pass_psd_score(residual: np.ndarray) -> float:
"""
Power Spectral Density (PSD) via Welch's method on residual.
Examines high-frequency content in the noise.
Diffusion models tend to suppress certain frequency bands.
Returns score in [0, 1].
"""
flat = residual.flatten().astype(np.float64)
freqs, power = welch(flat, nperseg=256)
if len(power) < 10:
return 0.5
# Split into low and high frequency bands
n = len(power)
low_power = float(np.mean(power[:n // 4]))
high_power = float(np.mean(power[3*n // 4:]))
if low_power < 1e-8:
return 0.5
ratio = high_power / (low_power + 1e-8)
# Real camera: ratio ~0.2β0.5 (more low-freq noise)
# Diffusion : ratio can be < 0.1 (HF suppressed) or >0.8 (HF amplified)
if ratio < 0.10:
score = np.clip((0.10 - ratio) / 0.10, 0.0, 1.0)
elif ratio > 0.60:
score = np.clip((ratio - 0.60) / 0.40, 0.0, 1.0)
else:
score = 0.0
return float(score)
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Public API
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def run_diffusion_branch(img: np.ndarray) -> dict:
"""
Run the complete Diffusion Residual Analysis Branch.
Args:
img : float32 numpy array (H, W, 3) in [0, 1] β RGB image
Returns:
dict with keys:
"prob_fake" : float β probability the image is AI-generated
"confidence" : float β certainty of this branch's estimate
"noise_map" : np.ndarray (H, W) float32 β residual noise (for viz)
"""
gray = to_grayscale(img) # (H, W) float32 in [0, 1]
# Compute residual noise at two scales for robustness
residual_fine = _compute_residual_noise(gray, sigma=1)
residual_coarse = _compute_residual_noise(gray, sigma=3)
# Four forensic signals on fine residual
kurtosis_score = _noise_kurtosis_score(residual_fine)
variance_score = _noise_variance_score(residual_fine)
uniformity_score = _noise_uniformity_score(residual_fine)
psd_score = _high_pass_psd_score(residual_coarse)
# Weighted combination
prob_fake = (
0.30 * kurtosis_score +
0.20 * variance_score +
0.35 * uniformity_score +
0.15 * psd_score
)
prob_fake = float(np.clip(prob_fake, 0.0, 1.0))
# Confidence: agreement between signals
scores = [kurtosis_score, variance_score, uniformity_score, psd_score]
agreement = 1.0 - float(np.std(scores))
confidence = float(np.clip(agreement * 0.88, 0.1, 0.90))
# Noise map for visualization: normalize to [0, 1]
noise_vis = np.abs(residual_fine)
if noise_vis.max() > 0:
noise_vis /= noise_vis.max()
return {
"prob_fake": prob_fake,
"confidence": confidence,
"noise_map": noise_vis.astype(np.float32),
}
|