Spaces:

akcanca
/

dftest1

Sleeping

File size: 6,053 Bytes

07fe054

import numpy as np
import cv2
from scipy.fftpack import dct, fft2, fftshift
from scipy import stats

class FrequencyExtractor:
    """

    Extracts frequency domain features (DCT, FFT) for deepfake detection.

    

    Features include:

    - DCT coefficient statistics (mean, std)

    - FFT radial profile statistics (mean, std, decay rate)

    - Frequency band energies (low, mid, high)

    - Peakiness metric for detecting upsampling artifacts

    """
    
    def __init__(self):
        pass

    def extract_features(self, image):
        """

        Extracts frequency domain features (DCT, FFT).

        

        Args:

            image (PIL.Image or np.ndarray): Input image.

            

        Returns:

            dict: Dictionary of features with normalized values.

        """
        if not isinstance(image, np.ndarray):
            image = np.array(image)
            
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        else:
            gray = image
            
        features = {}
        
        # --- DCT Features ---
        # Compute 2D DCT (orthonormal normalization)
        dct_map = dct(dct(gray.T, norm='ortho').T, norm='ortho')
        
        # Histogram of DCT coefficients (excluding DC component)
        dct_coeffs = dct_map.flatten()
        dct_coeffs = dct_coeffs[1:]  # Remove DC component
        
        # Statistics on DCT coefficients (using absolute values)
        dct_abs = np.abs(dct_coeffs)
        features['dct_mean'] = float(np.mean(dct_abs))
        features['dct_std'] = float(np.std(dct_abs))
        
        # --- FFT Features ---
        # Compute 2D FFT and shift to center DC component
        f = fft2(gray.astype(np.float64))  # Use float64 for better precision
        fshift = fftshift(f)
        
        # Use log-magnitude spectrum (20*log10) for consistent normalization
        # This matches the noiseprint extractor and provides better dynamic range
        magnitude_spectrum = 20 * np.log10(np.abs(fshift) + 1e-10)
        
        # Azimuthal average (Radial Profile)
        # This computes the average magnitude at each radial distance from center
        h, w = magnitude_spectrum.shape
        cy, cx = h // 2, w // 2
        y, x = np.ogrid[-cy:h-cy, -cx:w-cx]
        r = np.sqrt(x**2 + y**2)
        r = r.astype(int)
        
        # Compute radial profile: average magnitude at each radius
        tbin = np.bincount(r.ravel(), magnitude_spectrum.ravel())
        nr = np.bincount(r.ravel())
        radial_profile = tbin / np.maximum(nr, 1)
        
        # Remove zero-radius (DC component) for better statistics
        if len(radial_profile) > 1:
            radial_profile_nonzero = radial_profile[1:]
        else:
            radial_profile_nonzero = radial_profile
        
        # Summary stats of radial profile
        features['fft_radial_mean'] = float(np.mean(radial_profile_nonzero))
        features['fft_radial_std'] = float(np.std(radial_profile_nonzero))
        
        # Improved radial decay metric: fit linear slope instead of assuming monotonic decay
        # This is more robust to non-monotonic profiles (e.g., peaks at intermediate frequencies)
        n = len(radial_profile_nonzero)
        if n >= 3:
            # Fit linear regression to log(radius) vs magnitude to estimate decay rate
            # This captures the overall trend without assuming monotonicity
            radii = np.arange(1, n + 1, dtype=np.float64)
            # Use log(radius) to better capture power-law decay
            log_radii = np.log(radii + 1e-10)
            
            # Fit linear model: magnitude = a * log(radius) + b
            # Negative slope indicates decay (typical for natural images)
            # Positive slope indicates high-frequency emphasis (typical for upsampled images)
            try:
                slope, intercept, r_value, p_value, std_err = stats.linregress(
                    log_radii, radial_profile_nonzero
                )
                features['fft_radial_decay'] = float(slope)  # Decay rate (negative = decay)
                features['fft_radial_decay_r2'] = float(r_value**2)  # Goodness of fit
            except:
                # Fallback: simple difference if regression fails
                features['fft_radial_decay'] = float(
                    radial_profile_nonzero[0] - radial_profile_nonzero[-1]
                )
                features['fft_radial_decay_r2'] = 0.0
        else:
            features['fft_radial_decay'] = 0.0
            features['fft_radial_decay_r2'] = 0.0
        
        # Frequency band energies (low, mid, high)
        if n >= 9:  # Ensure enough samples for band division
            # Divide radial profile into 3 bands: low, mid, high frequency
            edges = np.linspace(0, n, 4, dtype=int)  # 3 bands
            low_band = radial_profile_nonzero[edges[0]:edges[1]]
            mid_band = radial_profile_nonzero[edges[1]:edges[2]]
            high_band = radial_profile_nonzero[edges[2]:edges[3]]
            
            features['fft_low_energy'] = float(np.mean(low_band))
            features['fft_mid_energy'] = float(np.mean(mid_band))
            features['fft_high_energy'] = float(np.mean(high_band))
            
            # Peakiness: ratio of max to mean (detects sharp peaks from upsampling)
            # High peakiness indicates periodic patterns (upsampling artifacts)
            profile_mean = np.mean(radial_profile_nonzero)
            profile_max = np.max(radial_profile_nonzero)
            features['fft_peakiness'] = float(profile_max / (profile_mean + 1e-10))
        else:
            # Not enough samples for band analysis
            features['fft_low_energy'] = 0.0
            features['fft_mid_energy'] = 0.0
            features['fft_high_energy'] = 0.0
            features['fft_peakiness'] = 0.0

        return features