Spaces:
Sleeping
Sleeping
| """ | |
| models/frequency_detector.py | |
| Frequency domain analysis for detecting AI-generated / manipulated images. | |
| AI-generated images consistently exhibit unnatural patterns in the DCT/FFT | |
| frequency domain compared to real photographs: | |
| - Unusual high-frequency energy distribution | |
| - Periodic artifacts in the spectrum | |
| - Lack of natural 1/f pink-noise roll-off | |
| This module analyses both the DCT (block-based, like JPEG compression) and | |
| FFT (global frequency spectrum) properties of the input image. | |
| """ | |
| import numpy as np | |
| from PIL import Image | |
| from scipy.fft import fft2, fftshift, dct | |
| from typing import Dict | |
| import cv2 | |
| import io | |
| import sys | |
| import os | |
| try: | |
| _base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| except NameError: | |
| _base_dir = os.path.abspath(os.getcwd()) | |
| sys.path.append(_base_dir) | |
| from image_authenticity import config | |
| class FrequencyDetector: | |
| """ | |
| Frequency domain real/fake detector. | |
| Analyses DCT block statistics and FFT spectrum to detect | |
| AI-generation artefacts. | |
| """ | |
| def __init__(self): | |
| self.image_size = config.FREQ_IMAGE_SIZE | |
| self.patch_size = config.FREQ_DCT_PATCH_SIZE | |
| self.hf_thresh = config.FREQ_HIGH_FREQ_THRESH | |
| def _preprocess(self, image: Image.Image) -> np.ndarray: | |
| """Convert PIL image to grayscale numpy array, resized.""" | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| img = image.resize((self.image_size, self.image_size), Image.LANCZOS) | |
| gray = np.array(img.convert("L"), dtype=np.float32) | |
| return gray | |
| # ββ FFT analysis βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _fft_analysis(self, gray: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Compute FFT spectrum and extract frequency statistics. | |
| Real photos follow a natural 1/f^Ξ± power spectral density (Ξ± β 1.5-2.5). | |
| AI images often deviate with excessive high-freq energy or grid artifacts. | |
| BUG FIX: Previous version applied log1p to the magnitude THEN log1p again | |
| during polyfit, corrupting the alpha estimate (giving ~0.02 instead of ~1.5). | |
| Now uses raw power (|f|^2) radial profile for the slope fit. | |
| """ | |
| f = fftshift(fft2(gray)) | |
| # Raw power spectrum β NOT log-compressed β for correct 1/f slope fitting | |
| power = np.abs(f) ** 2 | |
| h, w = power.shape | |
| cy, cx = h // 2, w // 2 | |
| Y, X = np.ogrid[:h, :w] | |
| R = np.sqrt((X - cx)**2 + (Y - cy)**2) | |
| max_r = np.sqrt(cx**2 + cy**2) | |
| # Radial power spectral density (exclude DC bin at r=0) | |
| radial_bins = 48 | |
| bin_edges = np.linspace(1.0, max_r, radial_bins + 1) | |
| psd_raw = [] | |
| freq_vals = [] | |
| for i in range(radial_bins): | |
| mask = (R >= bin_edges[i]) & (R < bin_edges[i+1]) | |
| if mask.sum() > 0: | |
| psd_raw.append(power[mask].mean()) | |
| # Normalized spatial frequency [0, 1] | |
| freq_vals.append((bin_edges[i] + bin_edges[i+1]) / 2.0 / max_r) | |
| psd_raw = np.array(psd_raw, dtype=np.float64) | |
| freq_vals = np.array(freq_vals, dtype=np.float64) | |
| # Fit log(power) = -alpha * log(freq) + c β 1/f^alpha model | |
| # Use log of actual power and log of actual frequency | |
| log_freq = np.log(freq_vals + 1e-12) | |
| log_power = np.log(psd_raw + 1e-12) | |
| if len(log_freq) > 3 and log_power.std() > 0: | |
| alpha = float(-np.polyfit(log_freq, log_power, 1)[0]) | |
| else: | |
| alpha = 1.8 # Neutral fallback | |
| # High-frequency fraction of total power (above median frequency) | |
| mid_point = len(psd_raw) // 2 | |
| low_power = psd_raw[:mid_point].sum() | |
| high_power = psd_raw[mid_point:].sum() | |
| total_power = low_power + high_power + 1e-12 | |
| hf_ratio = float(high_power / total_power) | |
| # Periodic artifact score β deviation of log-mag radial profile from smooth fit | |
| # Use log-compressed for this (looking for periodic bumps, not absolute power) | |
| log_mag_psd = np.log1p(np.sqrt(psd_raw)) | |
| fitted = np.poly1d(np.polyfit(np.arange(len(log_mag_psd)), log_mag_psd, 2))(np.arange(len(log_mag_psd))) | |
| residuals = np.abs(log_mag_psd - fitted) | |
| periodic_score = float(residuals.std() / (log_mag_psd.mean() + 1e-8)) | |
| return { | |
| "spectral_alpha": alpha, | |
| "hf_ratio": hf_ratio, | |
| "periodic_score": periodic_score, | |
| } | |
| # ββ DCT block analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _dct_analysis(self, gray: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Block DCT analysis (like JPEG 8x8 blocks). | |
| AI images often have: | |
| - Higher energy in the AC (non-DC) coefficients | |
| - Unnatural coefficient distribution across blocks | |
| """ | |
| h, w = gray.shape | |
| p = self.patch_size | |
| # Crop to multiples of patch size | |
| gray = gray[:h - h % p, :w - w % p] | |
| ac_energies = [] | |
| dc_ac_ratios = [] | |
| cross_block_vars = [] | |
| for i in range(0, gray.shape[0], p): | |
| for j in range(0, gray.shape[1], p): | |
| block = gray[i:i+p, j:j+p] | |
| dct_block = dct(dct(block, axis=0, norm='ortho'), | |
| axis=1, norm='ortho') | |
| dc = float(dct_block[0, 0]**2) | |
| ac = float((dct_block**2).sum() - dc) | |
| ac_energies.append(ac) | |
| dc_ac_ratios.append(dc / (ac + 1e-8)) | |
| cross_block_vars.append(dct_block[1:, 1:].std()) | |
| ac_mean = float(np.mean(ac_energies)) | |
| ac_std = float(np.std(ac_energies)) | |
| dc_ac_mu = float(np.mean(dc_ac_ratios)) | |
| cb_var = float(np.mean(cross_block_vars)) | |
| return { | |
| "dct_ac_mean": ac_mean, | |
| "dct_ac_std": ac_std, | |
| "dct_dc_ac_ratio": dc_ac_mu, | |
| "dct_cross_block_var": cb_var, | |
| } | |
| # ββ ELA (Error Level Analysis) βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _ela_analysis(self, image: Image.Image) -> Dict[str, float]: | |
| """ | |
| Error Level Analysis (ELA). | |
| Saves image at JPEG quality 95 and computes per-pixel residual. | |
| Real camera JPEG photos have non-uniform ELA (sky, skin, edges compress | |
| at different rates). AI images show unnaturally uniform ELA (diffusion) | |
| or abnormally high ELA (GAN block overwriting). | |
| Format-aware weighting: images with no prior JPEG history (PNG files, | |
| numpy arrays from Gradio, freshly generated images) will always have | |
| very low ELA. We detect this by checking ela_mean < 1.5 after the | |
| first recompression, and scale down the ELA weight accordingly. | |
| This is more robust than checking image.format (which is None for | |
| all numpy-array-sourced images from Gradio). | |
| """ | |
| if image.mode != "RGB": | |
| image = image.convert("RGB") | |
| # Re-save at quality 95 into an in-memory buffer | |
| buf = io.BytesIO() | |
| image.save(buf, format="JPEG", quality=95) | |
| buf.seek(0) | |
| recompressed = Image.open(buf).convert("RGB") | |
| orig = np.array(image, dtype=np.float32) | |
| comp = np.array(recompressed, dtype=np.float32) | |
| ela_map = np.abs(orig - comp) | |
| ela_mean = float(ela_map.mean()) | |
| ela_std = float(ela_map.std()) | |
| # If ela_mean is very low (<1.5), the image has no JPEG history. | |
| # ELA is unreliable in this case β reduce its weight to 0.25Γ. | |
| # Real camera JPEGs typically have ela_mean β₯ 2.0. | |
| ela_weight_scale = 0.25 if ela_mean < 1.5 else 1.0 | |
| return { | |
| "ela_mean": ela_mean, | |
| "ela_std": ela_std, | |
| "ela_weight_scale": ela_weight_scale, | |
| } | |
| # ββ Texture complexity analysis βββββββββββββββββββββββββββββββββββββββ | |
| def _texture_analysis(self, gray: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Texture complexity via local vs global Laplacian variance ratio. | |
| Real photos: locally varying texture (e.g., hair, grass, skin pores). | |
| AI images: globally smooth with occasional ultra-sharp edges. | |
| We split the image into 8x8 tiles, compute Laplacian variance per tile, | |
| then measure: | |
| - local_var_mean : mean tile variance | |
| - local_var_cv : coefficient of variation of tile variances | |
| (low CV = unnaturally uniform texture = AI) | |
| - global_local_ratio: global_laplacian_var / local_var_mean | |
| (high ratio = sharp overall but locally flat = AI) | |
| """ | |
| gray_u8 = np.clip(gray, 0, 255).astype(np.uint8) | |
| global_lapv = float(cv2.Laplacian(gray_u8, cv2.CV_64F).var()) | |
| tile = 32 | |
| h, w = gray_u8.shape | |
| tile_vars = [] | |
| for i in range(0, h - tile + 1, tile): | |
| for j in range(0, w - tile + 1, tile): | |
| patch = gray_u8[i:i+tile, j:j+tile] | |
| lv = float(cv2.Laplacian(patch, cv2.CV_64F).var()) | |
| tile_vars.append(lv) | |
| tile_vars = np.array(tile_vars, dtype=np.float64) | |
| local_mean = float(tile_vars.mean()) if len(tile_vars) > 0 else 1.0 | |
| local_cv = float(tile_vars.std() / (local_mean + 1e-8)) | |
| return { | |
| "texture_local_mean": local_mean, | |
| "texture_local_cv": local_cv, | |
| "texture_global_lapv": global_lapv, | |
| } | |
| # ββ Color saturation analysis ββββββββββββββββββββββββββββββββββββββββββ | |
| def _color_analysis(self, image: Image.Image) -> Dict[str, float]: | |
| """ | |
| Color saturation and hue statistics. | |
| Diffusion models tend to over-saturate mid-tone regions with | |
| vivid, uniform color. Real photos have a characteristic right-skewed | |
| saturation histogram (most pixels are unsaturated; a few are vivid). | |
| - sat_mean : mean saturation (AI tends higher) | |
| - sat_std : std of saturation | |
| - sat_skew : saturation skewness (real: right-skewed > 0.5; | |
| AI: near-uniform, lower skewness) | |
| - rg_corr : R-G channel correlation (real photos have specific | |
| chromatic correlation from Bayer sensor demosaicing) | |
| """ | |
| rgb = np.array(image.convert("RGB"), dtype=np.float32) / 255.0 | |
| hsv = cv2.cvtColor((rgb * 255).astype(np.uint8), cv2.COLOR_RGB2HSV) | |
| sat = hsv[:, :, 1].astype(np.float32) / 255.0 # [0,1] | |
| sat_mean = float(sat.mean()) | |
| sat_std = float(sat.std()) | |
| flat_sat = sat.flatten() | |
| if sat_std > 1e-6: | |
| sat_skew = float(np.mean(((flat_sat - sat_mean) / sat_std) ** 3)) | |
| else: | |
| sat_skew = 0.0 | |
| # R-G channel correlation (from Bayer demosaicing in real cameras) | |
| r = rgb[:, :, 0].flatten() | |
| g = rgb[:, :, 1].flatten() | |
| rg_corr = float(np.corrcoef(r, g)[0, 1]) if r.std() > 0 and g.std() > 0 else 0.0 | |
| return { | |
| "sat_mean": sat_mean, | |
| "sat_std": sat_std, | |
| "sat_skew": sat_skew, | |
| "rg_corr": rg_corr, | |
| } | |
| # ββ Benford's Law DCT analysis ββββββββββββββββββββββββββββββββββββββββ | |
| def _benford_analysis(self, gray: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Benford's Law analysis on DCT coefficient first digits. | |
| In natural signals (including DCT coefficients of real photos), the | |
| leading digit follows Benford's distribution: | |
| P(d) = log10(1 + 1/d) for d in 1..9 | |
| AI-generated images often produce more uniform digit distributions | |
| due to the statistical regularisation in neural network outputs. | |
| Returns: | |
| - benford_mse : mean squared error between observed and ideal Benford | |
| (higher = less natural = more likely AI) | |
| """ | |
| h, w = gray.shape | |
| p = self.patch_size | |
| gray_c = gray[:h - h % p, :w - w % p] | |
| all_ac_coeffs = [] | |
| for i in range(0, gray_c.shape[0], p): | |
| for j in range(0, gray_c.shape[1], p): | |
| block = gray_c[i:i+p, j:j+p] | |
| dct_block = dct(dct(block, axis=0, norm='ortho'), axis=1, norm='ortho') | |
| # Exclude DC (0,0) | |
| ac = dct_block.flatten()[1:] | |
| all_ac_coeffs.extend(np.abs(ac[ac > 1.0]).tolist()) | |
| if len(all_ac_coeffs) < 100: | |
| return {"benford_mse": 0.0} | |
| coeffs = np.array(all_ac_coeffs) | |
| # Extract first significant digit | |
| first_digits = np.floor(coeffs / 10.0 ** np.floor(np.log10(coeffs + 1e-12))).astype(int) | |
| first_digits = first_digits[(first_digits >= 1) & (first_digits <= 9)] | |
| if len(first_digits) < 50: | |
| return {"benford_mse": 0.0} | |
| observed = np.bincount(first_digits, minlength=10)[1:10] / len(first_digits) | |
| ideal = np.array([np.log10(1 + 1/d) for d in range(1, 10)]) | |
| benford_mse = float(np.mean((observed - ideal) ** 2)) | |
| return {"benford_mse": benford_mse} | |
| # ββ Noise analysis ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _noise_analysis(self, gray: np.ndarray) -> Dict[str, float]: | |
| """ | |
| Analyse high-frequency noise patterns. | |
| Real cameras introduce natural photon shot noise (Poisson) and | |
| read noise (Gaussian). AI generators often produce too-clean or | |
| unnaturally patterned noise textures. | |
| """ | |
| # Laplacian (high-pass) residual | |
| gray_u8 = np.clip(gray, 0, 255).astype(np.uint8) | |
| laplacian = cv2.Laplacian(gray_u8, cv2.CV_64F) | |
| noise_var = float(laplacian.var()) | |
| noise_mean = float(np.abs(laplacian).mean()) | |
| # Kurtosis of noise histogram β real noise is close to Gaussian (kurtosis ~3) | |
| flat = laplacian.flatten() | |
| if flat.std() > 0: | |
| kurt = float(np.mean(((flat - flat.mean()) / flat.std())**4)) | |
| else: | |
| kurt = 3.0 | |
| return { | |
| "noise_variance": noise_var, | |
| "noise_mean_abs": noise_mean, | |
| "noise_kurtosis": kurt, | |
| } | |
| # ββ Score computation βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _compute_fake_score(self, fft_stats, dct_stats, noise_stats, ela_stats, | |
| texture_stats=None, color_stats=None, benford_stats=None) -> float: | |
| """ | |
| Weighted combination of ALL frequency/forensic features -> fake probability. | |
| """ | |
| weighted_score = 0.0 | |
| total_weight = 0.0 | |
| # 1. Spectral alpha [weight 1.5] | |
| alpha = fft_stats["spectral_alpha"] | |
| alpha_score = float(np.clip(abs(alpha - 1.8) / 1.0 - 1.0, 0.0, 1.0)) | |
| weighted_score += 1.5 * alpha_score | |
| total_weight += 1.5 | |
| # 2. High-frequency ratio [weight 1.0] | |
| hf = fft_stats["hf_ratio"] | |
| hf_score = float(np.clip((hf - 0.30) / 0.25, 0.0, 1.0)) | |
| weighted_score += 1.0 * hf_score | |
| total_weight += 1.0 | |
| # 3. Periodic artifacts [weight 0.75] | |
| ps = fft_stats["periodic_score"] | |
| ps_score = float(np.clip(ps / 0.5, 0.0, 1.0)) | |
| weighted_score += 0.75 * ps_score | |
| total_weight += 0.75 | |
| # 4. Noise kurtosis [weight 0.75] | |
| kurt = noise_stats["noise_kurtosis"] | |
| kurt_score = float(np.clip(abs(kurt - 3.0) / 15.0, 0.0, 1.0)) | |
| weighted_score += 0.75 * kurt_score | |
| total_weight += 0.75 | |
| # 5. DCT coefficient variation [weight 0.75] | |
| dct_cv = dct_stats["dct_ac_std"] / (dct_stats["dct_ac_mean"] + 1e-8) | |
| dct_score = float(1.0 - np.clip(dct_cv / 1.5, 0.0, 1.0)) | |
| weighted_score += 0.75 * dct_score | |
| total_weight += 0.75 | |
| # 6. ELA std + mean [weight scaled by ela_weight_scale] | |
| ela_scale = ela_stats.get("ela_weight_scale", 1.0) | |
| ela_std_score = float(np.clip((8.0 - ela_stats["ela_std"]) / 7.0, 0.0, 1.0)) | |
| weighted_score += (1.25 * ela_scale) * ela_std_score | |
| total_weight += (1.25 * ela_scale) | |
| ela_mean_score = float(np.clip((ela_stats["ela_mean"] - 8.0) / 12.0, 0.0, 1.0)) | |
| weighted_score += (1.0 * ela_scale) * ela_mean_score | |
| total_weight += (1.0 * ela_scale) | |
| # 7. Texture local CV [weight 1.0] | |
| # AI images: unnaturally low tile-variance CV (globally flat texture) | |
| if texture_stats: | |
| lcv = texture_stats["texture_local_cv"] | |
| # Real: lcv > 1.0 (very heterogeneous textures); AI: lcv < 0.5 | |
| tex_score = float(np.clip((1.0 - lcv) / 0.8, 0.0, 1.0)) | |
| weighted_score += 1.0 * tex_score | |
| total_weight += 1.0 | |
| # 8. Color saturation skewness [weight 0.75] | |
| # Real photos: positively skewed saturation (most pixels low, few vivid) | |
| # AI images: more uniform saturation = lower skewness | |
| if color_stats: | |
| skew = color_stats["sat_skew"] | |
| # Real: skew > 1.0; AI diffusion: skew near 0 or negative | |
| skew_score = float(np.clip((1.0 - skew) / 1.5, 0.0, 1.0)) | |
| weighted_score += 0.75 * skew_score | |
| total_weight += 0.75 | |
| # Saturation mean: AI tends to be more saturated | |
| sat_m = color_stats["sat_mean"] | |
| sat_score = float(np.clip((sat_m - 0.30) / 0.30, 0.0, 1.0)) | |
| weighted_score += 0.5 * sat_score | |
| total_weight += 0.5 | |
| # 9. Benford's Law MSE [weight 1.0] | |
| # Higher MSE = less natural = more likely AI | |
| if benford_stats: | |
| bmse = benford_stats["benford_mse"] | |
| # Natural MSE range: 0.0001-0.001; AI: often > 0.003 | |
| benford_score = float(np.clip(bmse / 0.005, 0.0, 1.0)) | |
| weighted_score += 1.0 * benford_score | |
| total_weight += 1.0 | |
| return float(np.clip(weighted_score / total_weight, 0.0, 1.0)) | |
| def predict(self, image: Image.Image) -> Dict[str, float]: | |
| """Full frequency-domain + ELA + texture + color + Benford analysis.""" | |
| gray = self._preprocess(image) | |
| fft_s = self._fft_analysis(gray) | |
| dct_s = self._dct_analysis(gray) | |
| noise_s = self._noise_analysis(gray) | |
| ela_s = self._ela_analysis(image) | |
| texture_s = self._texture_analysis(gray) | |
| color_s = self._color_analysis(image) | |
| benford_s = self._benford_analysis(gray) | |
| fake_prob = self._compute_fake_score( | |
| fft_s, dct_s, noise_s, ela_s, texture_s, color_s, benford_s | |
| ) | |
| real_prob = 1.0 - fake_prob | |
| return { | |
| "fake_prob": fake_prob, | |
| "real_prob": real_prob, | |
| "hf_ratio": fft_s["hf_ratio"], | |
| "periodic_score": fft_s["periodic_score"], | |
| "spectral_alpha": fft_s["spectral_alpha"], | |
| "noise_kurtosis": noise_s["noise_kurtosis"], | |
| "dct_ac_mean": dct_s["dct_ac_mean"], | |
| "ela_mean": ela_s["ela_mean"], | |
| "ela_std": ela_s["ela_std"], | |
| "texture_local_cv": texture_s["texture_local_cv"], | |
| "sat_mean": color_s["sat_mean"], | |
| "sat_skew": color_s["sat_skew"], | |
| "benford_mse": benford_s["benford_mse"], | |
| } | |
| def get_fft_spectrum(self, image: Image.Image) -> np.ndarray: | |
| """ | |
| Get the log-magnitude FFT spectrum as a 2D numpy array for visualization. | |
| Returns: | |
| np.ndarray of shape (image_size, image_size) normalised to [0, 1] | |
| """ | |
| gray = self._preprocess(image) | |
| f = fftshift(fft2(gray)) | |
| mag = np.log1p(np.abs(f)) | |
| mag = (mag - mag.min()) / (mag.max() - mag.min() + 1e-8) | |
| return mag.astype(np.float32) | |
| def __repr__(self): | |
| return f"FrequencyDetector(image_size={self.image_size})" | |