Spaces:
Sleeping
Sleeping
File size: 6,053 Bytes
07fe054 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | import numpy as np
import cv2
from scipy.fftpack import dct, fft2, fftshift
from scipy import stats
class FrequencyExtractor:
"""
Extracts frequency domain features (DCT, FFT) for deepfake detection.
Features include:
- DCT coefficient statistics (mean, std)
- FFT radial profile statistics (mean, std, decay rate)
- Frequency band energies (low, mid, high)
- Peakiness metric for detecting upsampling artifacts
"""
def __init__(self):
pass
def extract_features(self, image):
"""
Extracts frequency domain features (DCT, FFT).
Args:
image (PIL.Image or np.ndarray): Input image.
Returns:
dict: Dictionary of features with normalized values.
"""
if not isinstance(image, np.ndarray):
image = np.array(image)
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
features = {}
# --- DCT Features ---
# Compute 2D DCT (orthonormal normalization)
dct_map = dct(dct(gray.T, norm='ortho').T, norm='ortho')
# Histogram of DCT coefficients (excluding DC component)
dct_coeffs = dct_map.flatten()
dct_coeffs = dct_coeffs[1:] # Remove DC component
# Statistics on DCT coefficients (using absolute values)
dct_abs = np.abs(dct_coeffs)
features['dct_mean'] = float(np.mean(dct_abs))
features['dct_std'] = float(np.std(dct_abs))
# --- FFT Features ---
# Compute 2D FFT and shift to center DC component
f = fft2(gray.astype(np.float64)) # Use float64 for better precision
fshift = fftshift(f)
# Use log-magnitude spectrum (20*log10) for consistent normalization
# This matches the noiseprint extractor and provides better dynamic range
magnitude_spectrum = 20 * np.log10(np.abs(fshift) + 1e-10)
# Azimuthal average (Radial Profile)
# This computes the average magnitude at each radial distance from center
h, w = magnitude_spectrum.shape
cy, cx = h // 2, w // 2
y, x = np.ogrid[-cy:h-cy, -cx:w-cx]
r = np.sqrt(x**2 + y**2)
r = r.astype(int)
# Compute radial profile: average magnitude at each radius
tbin = np.bincount(r.ravel(), magnitude_spectrum.ravel())
nr = np.bincount(r.ravel())
radial_profile = tbin / np.maximum(nr, 1)
# Remove zero-radius (DC component) for better statistics
if len(radial_profile) > 1:
radial_profile_nonzero = radial_profile[1:]
else:
radial_profile_nonzero = radial_profile
# Summary stats of radial profile
features['fft_radial_mean'] = float(np.mean(radial_profile_nonzero))
features['fft_radial_std'] = float(np.std(radial_profile_nonzero))
# Improved radial decay metric: fit linear slope instead of assuming monotonic decay
# This is more robust to non-monotonic profiles (e.g., peaks at intermediate frequencies)
n = len(radial_profile_nonzero)
if n >= 3:
# Fit linear regression to log(radius) vs magnitude to estimate decay rate
# This captures the overall trend without assuming monotonicity
radii = np.arange(1, n + 1, dtype=np.float64)
# Use log(radius) to better capture power-law decay
log_radii = np.log(radii + 1e-10)
# Fit linear model: magnitude = a * log(radius) + b
# Negative slope indicates decay (typical for natural images)
# Positive slope indicates high-frequency emphasis (typical for upsampled images)
try:
slope, intercept, r_value, p_value, std_err = stats.linregress(
log_radii, radial_profile_nonzero
)
features['fft_radial_decay'] = float(slope) # Decay rate (negative = decay)
features['fft_radial_decay_r2'] = float(r_value**2) # Goodness of fit
except:
# Fallback: simple difference if regression fails
features['fft_radial_decay'] = float(
radial_profile_nonzero[0] - radial_profile_nonzero[-1]
)
features['fft_radial_decay_r2'] = 0.0
else:
features['fft_radial_decay'] = 0.0
features['fft_radial_decay_r2'] = 0.0
# Frequency band energies (low, mid, high)
if n >= 9: # Ensure enough samples for band division
# Divide radial profile into 3 bands: low, mid, high frequency
edges = np.linspace(0, n, 4, dtype=int) # 3 bands
low_band = radial_profile_nonzero[edges[0]:edges[1]]
mid_band = radial_profile_nonzero[edges[1]:edges[2]]
high_band = radial_profile_nonzero[edges[2]:edges[3]]
features['fft_low_energy'] = float(np.mean(low_band))
features['fft_mid_energy'] = float(np.mean(mid_band))
features['fft_high_energy'] = float(np.mean(high_band))
# Peakiness: ratio of max to mean (detects sharp peaks from upsampling)
# High peakiness indicates periodic patterns (upsampling artifacts)
profile_mean = np.mean(radial_profile_nonzero)
profile_max = np.max(radial_profile_nonzero)
features['fft_peakiness'] = float(profile_max / (profile_mean + 1e-10))
else:
# Not enough samples for band analysis
features['fft_low_energy'] = 0.0
features['fft_mid_energy'] = 0.0
features['fft_high_energy'] = 0.0
features['fft_peakiness'] = 0.0
return features
|