Spaces:

akcanca
/

dftest1

Sleeping

App Files Files Community

dftest1 / src /features /frequency.py

akcanca

Upload 110 files (#1)

07fe054 verified 3 months ago

raw

history blame contribute delete

6.05 kB

	import numpy as np
	import cv2
	from scipy.fftpack import dct, fft2, fftshift
	from scipy import stats

	class FrequencyExtractor:
	"""
	Extracts frequency domain features (DCT, FFT) for deepfake detection.

	Features include:
	- DCT coefficient statistics (mean, std)
	- FFT radial profile statistics (mean, std, decay rate)
	- Frequency band energies (low, mid, high)
	- Peakiness metric for detecting upsampling artifacts
	"""

	def __init__(self):
	pass

	def extract_features(self, image):
	"""
	Extracts frequency domain features (DCT, FFT).

	Args:
	image (PIL.Image or np.ndarray): Input image.

	Returns:
	dict: Dictionary of features with normalized values.
	"""
	if not isinstance(image, np.ndarray):
	image = np.array(image)

	if len(image.shape) == 3:
	gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
	else:
	gray = image

	features = {}

	# --- DCT Features ---
	# Compute 2D DCT (orthonormal normalization)
	dct_map = dct(dct(gray.T, norm='ortho').T, norm='ortho')

	# Histogram of DCT coefficients (excluding DC component)
	dct_coeffs = dct_map.flatten()
	dct_coeffs = dct_coeffs[1:] # Remove DC component

	# Statistics on DCT coefficients (using absolute values)
	dct_abs = np.abs(dct_coeffs)
	features['dct_mean'] = float(np.mean(dct_abs))
	features['dct_std'] = float(np.std(dct_abs))

	# --- FFT Features ---
	# Compute 2D FFT and shift to center DC component
	f = fft2(gray.astype(np.float64)) # Use float64 for better precision
	fshift = fftshift(f)

	# Use log-magnitude spectrum (20*log10) for consistent normalization
	# This matches the noiseprint extractor and provides better dynamic range
	magnitude_spectrum = 20 * np.log10(np.abs(fshift) + 1e-10)

	# Azimuthal average (Radial Profile)
	# This computes the average magnitude at each radial distance from center
	h, w = magnitude_spectrum.shape
	cy, cx = h // 2, w // 2
	y, x = np.ogrid[-cy:h-cy, -cx:w-cx]
	r = np.sqrt(x2 + y2)
	r = r.astype(int)

	# Compute radial profile: average magnitude at each radius
	tbin = np.bincount(r.ravel(), magnitude_spectrum.ravel())
	nr = np.bincount(r.ravel())
	radial_profile = tbin / np.maximum(nr, 1)

	# Remove zero-radius (DC component) for better statistics
	if len(radial_profile) > 1:
	radial_profile_nonzero = radial_profile[1:]
	else:
	radial_profile_nonzero = radial_profile

	# Summary stats of radial profile
	features['fft_radial_mean'] = float(np.mean(radial_profile_nonzero))
	features['fft_radial_std'] = float(np.std(radial_profile_nonzero))

	# Improved radial decay metric: fit linear slope instead of assuming monotonic decay
	# This is more robust to non-monotonic profiles (e.g., peaks at intermediate frequencies)
	n = len(radial_profile_nonzero)
	if n >= 3:
	# Fit linear regression to log(radius) vs magnitude to estimate decay rate
	# This captures the overall trend without assuming monotonicity
	radii = np.arange(1, n + 1, dtype=np.float64)
	# Use log(radius) to better capture power-law decay
	log_radii = np.log(radii + 1e-10)

	# Fit linear model: magnitude = a * log(radius) + b
	# Negative slope indicates decay (typical for natural images)
	# Positive slope indicates high-frequency emphasis (typical for upsampled images)
	try:
	slope, intercept, r_value, p_value, std_err = stats.linregress(
	log_radii, radial_profile_nonzero
	)
	features['fft_radial_decay'] = float(slope) # Decay rate (negative = decay)
	features['fft_radial_decay_r2'] = float(r_value**2) # Goodness of fit
	except:
	# Fallback: simple difference if regression fails
	features['fft_radial_decay'] = float(
	radial_profile_nonzero[0] - radial_profile_nonzero[-1]
	)
	features['fft_radial_decay_r2'] = 0.0
	else:
	features['fft_radial_decay'] = 0.0
	features['fft_radial_decay_r2'] = 0.0

	# Frequency band energies (low, mid, high)
	if n >= 9: # Ensure enough samples for band division
	# Divide radial profile into 3 bands: low, mid, high frequency
	edges = np.linspace(0, n, 4, dtype=int) # 3 bands
	low_band = radial_profile_nonzero[edges[0]:edges[1]]
	mid_band = radial_profile_nonzero[edges[1]:edges[2]]
	high_band = radial_profile_nonzero[edges[2]:edges[3]]

	features['fft_low_energy'] = float(np.mean(low_band))
	features['fft_mid_energy'] = float(np.mean(mid_band))
	features['fft_high_energy'] = float(np.mean(high_band))

	# Peakiness: ratio of max to mean (detects sharp peaks from upsampling)
	# High peakiness indicates periodic patterns (upsampling artifacts)
	profile_mean = np.mean(radial_profile_nonzero)
	profile_max = np.max(radial_profile_nonzero)
	features['fft_peakiness'] = float(profile_max / (profile_mean + 1e-10))
	else:
	# Not enough samples for band analysis
	features['fft_low_energy'] = 0.0
	features['fft_mid_energy'] = 0.0
	features['fft_high_energy'] = 0.0
	features['fft_peakiness'] = 0.0

	return features