dftest1 / src /features /additional_features.py
akcanca's picture
Upload 110 files (#1)
07fe054 verified
"""
Additional Forensic Feature Extractors
Implements CFA (Color Filter Array) pattern consistency and defocus map analysis
for deepfake detection.
References:
- Kirchner & Gloe "Efficient Estimation of CFA Pattern Configuration" (SPIE 2010)
- "Unlocking Defocus Maps for Deepfake Detection" (arXiv:2509.23289)
"""
import numpy as np
import cv2
from scipy import ndimage
from scipy.stats import entropy
from typing import Dict, Tuple, Optional
class CFAExtractor:
"""
Extracts CFA (Color Filter Array) pattern consistency features.
Real camera images have consistent CFA demosaicing patterns (Bayer, X-Trans, etc.),
while synthetic images may lack these patterns or show inconsistencies.
"""
def __init__(self):
# Common Bayer patterns: RGGB, GRBG, GBRG, BGGR
# We'll check for RGGB pattern (most common)
self.bayer_patterns = {
'RGGB': ((0, 0), (0, 1), (1, 0), (1, 1)), # Red, Green, Green, Blue
'GRBG': ((0, 1), (0, 0), (1, 1), (1, 0)), # Green, Red, Blue, Green
'GBRG': ((1, 0), (1, 1), (0, 0), (0, 1)), # Green, Blue, Red, Green
'BGGR': ((1, 1), (1, 0), (0, 1), (0, 0)), # Blue, Green, Green, Red
}
def _estimate_bayer_pattern(self, image: np.ndarray) -> Tuple[str, float]:
"""
Estimate the most likely Bayer pattern by analyzing channel correlations.
Args:
image: RGB image array (H, W, 3)
Returns:
Tuple of (pattern_name, confidence_score)
"""
h, w = image.shape[:2]
r, g, b = image[:, :, 0], image[:, :, 1], image[:, :, 2]
# Compute correlations for different pattern offsets
best_pattern = 'RGGB'
best_score = 0.0
for pattern_name, offsets in self.bayer_patterns.items():
# Extract pixels at pattern positions
scores = []
# Check consistency at 2x2 block level
for i in range(0, h - 1, 2):
for j in range(0, w - 1, 2):
block_r = r[i:i+2, j:j+2]
block_g = g[i:i+2, j:j+2]
block_b = b[i:i+2, j:j+2]
# Compute variance within each channel at pattern positions
# Real CFA patterns show structured variance
r_var = np.var(block_r)
g_var = np.var(block_g)
b_var = np.var(block_b)
# Pattern consistency: channels should have different variance patterns
# This is a simplified heuristic
score = 1.0 / (1.0 + abs(r_var - g_var) + abs(g_var - b_var))
scores.append(score)
avg_score = np.mean(scores) if scores else 0.0
if avg_score > best_score:
best_score = avg_score
best_pattern = pattern_name
return best_pattern, float(best_score)
def _compute_demosaicing_consistency(self, image: np.ndarray) -> float:
"""
Compute spatial consistency of demosaicing patterns.
Real images have consistent demosaicing artifacts, while synthetic
images may lack these or show inconsistencies.
Args:
image: RGB image array (H, W, 3)
Returns:
Consistency score (higher = more consistent)
"""
h, w = image.shape[:2]
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY).astype(np.float32)
# Compute gradients in both directions
grad_x = cv2.Sobel(gray, cv2.CV_32F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_32F, 0, 1, ksize=3)
# Check for periodic patterns (CFA creates periodic artifacts)
# Analyze gradient patterns at 2x2 block level
block_size = 2
consistency_scores = []
for i in range(0, h - block_size, block_size):
for j in range(0, w - block_size, block_size):
block_gx = grad_x[i:i+block_size, j:j+block_size]
block_gy = grad_y[i:i+block_size, j:j+block_size]
# Compute variance within block
# Consistent CFA patterns show structured variance
var_gx = np.var(block_gx)
var_gy = np.var(block_gy)
# Consistency: variance should be similar across similar blocks
consistency_scores.append(var_gx + var_gy)
if not consistency_scores:
return 0.0
# Compute coefficient of variation (lower = more consistent)
scores_array = np.array(consistency_scores)
mean_score = np.mean(scores_array)
std_score = np.std(scores_array)
if mean_score < 1e-6:
return 0.0
cv_score = std_score / (mean_score + 1e-6)
# Invert: lower CV = higher consistency
consistency = 1.0 / (1.0 + cv_score)
return float(consistency)
def extract_features(self, image: np.ndarray) -> Dict[str, float]:
"""
Extract CFA pattern consistency features.
Args:
image: RGB image array (H, W, 3) or PIL Image
Returns:
Dictionary of CFA features
"""
if not isinstance(image, np.ndarray):
from PIL import Image
image = np.array(image)
if len(image.shape) != 3 or image.shape[2] != 3:
# Convert to RGB if needed
if len(image.shape) == 2:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
else:
image = image[:, :, :3]
# Estimate Bayer pattern
pattern, pattern_confidence = self._estimate_bayer_pattern(image)
# Compute demosaicing consistency
consistency_score = self._compute_demosaicing_consistency(image)
# Detect anomalies: low consistency suggests synthetic
anomalies_detected = consistency_score < 0.3
features = {
'cfa_pattern_confidence': pattern_confidence,
'cfa_consistency_score': consistency_score,
'cfa_anomalies_detected': float(anomalies_detected),
'cfa_pattern': hash(pattern) % 1000 # Encode pattern as numeric feature
}
return features
class DefocusExtractor:
"""
Extracts defocus map features for depth-of-field consistency analysis.
Real images have consistent defocus patterns based on depth, while
synthetic images may show inconsistent or unnatural defocus.
Reference: "Unlocking Defocus Maps for Deepfake Detection" (arXiv:2509.23289)
"""
def __init__(self):
pass
def _estimate_defocus_map(self, image: np.ndarray) -> np.ndarray:
"""
Estimate defocus map from image using edge-based method.
Defocus blurs edges, so we can estimate defocus by analyzing
edge sharpness across the image.
Args:
image: Grayscale image array (H, W)
Returns:
Defocus map (H, W) where higher values indicate more defocus
"""
# Convert to grayscale if needed
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY).astype(np.float32)
else:
gray = image.astype(np.float32)
# Compute edge strength using Laplacian
laplacian = cv2.Laplacian(gray, cv2.CV_32F, ksize=3)
edge_strength = np.abs(laplacian)
# Defocus reduces edge strength, so invert
# Higher values in defocus map = more defocus
defocus_map = 255.0 - np.clip(edge_strength * 10, 0, 255)
# Smooth to reduce noise
defocus_map = cv2.GaussianBlur(defocus_map, (5, 5), 1.0)
return defocus_map
def _compute_consistency_score(self, defocus_map: np.ndarray) -> float:
"""
Compute spatial consistency of defocus map.
Real images have smooth, consistent defocus transitions,
while synthetic images may show abrupt or inconsistent changes.
Args:
defocus_map: Defocus map array (H, W)
Returns:
Consistency score (higher = more consistent)
"""
# Compute gradient of defocus map
grad_x = cv2.Sobel(defocus_map, cv2.CV_32F, 1, 0, ksize=3)
grad_y = cv2.Sobel(defocus_map, cv2.CV_32F, 0, 1, ksize=3)
gradient_magnitude = np.sqrt(grad_x**2 + grad_y**2)
# Real images have smooth defocus transitions (low gradient)
# Synthetic images may have abrupt changes (high gradient)
mean_gradient = np.mean(gradient_magnitude)
std_gradient = np.std(gradient_magnitude)
# Consistency: lower mean gradient and lower variance = more consistent
if mean_gradient < 1e-6:
return 1.0
# Normalize and invert: lower gradient = higher consistency
consistency = 1.0 / (1.0 + mean_gradient / 255.0 + std_gradient / 255.0)
return float(consistency)
def _detect_anomalies(self, defocus_map: np.ndarray,
consistency_score: float) -> Tuple[list, float]:
"""
Detect anomalous regions in defocus map.
Args:
defocus_map: Defocus map array (H, W)
consistency_score: Overall consistency score
Returns:
Tuple of (anomaly_regions as list of bboxes, anomaly_score)
"""
# Compute local variance: high variance indicates inconsistent defocus
kernel_size = 15
local_mean = cv2.blur(defocus_map, (kernel_size, kernel_size))
local_var = cv2.blur((defocus_map - local_mean)**2, (kernel_size, kernel_size))
# Threshold for anomalies: regions with high local variance
threshold = np.percentile(local_var, 95)
anomaly_mask = local_var > threshold
# Find connected components (anomalous regions)
num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
anomaly_mask.astype(np.uint8), connectivity=8
)
# Extract bounding boxes for significant anomalies
anomaly_regions = []
min_area = defocus_map.size * 0.01 # At least 1% of image
for i in range(1, num_labels): # Skip background (label 0)
area = stats[i, cv2.CC_STAT_AREA]
if area > min_area:
x = int(stats[i, cv2.CC_STAT_LEFT])
y = int(stats[i, cv2.CC_STAT_TOP])
w = int(stats[i, cv2.CC_STAT_WIDTH])
h = int(stats[i, cv2.CC_STAT_HEIGHT])
anomaly_regions.append([x, y, w, h])
# Overall anomaly score: fraction of image that is anomalous
anomaly_fraction = np.sum(anomaly_mask) / defocus_map.size
anomaly_score = float(anomaly_fraction)
return anomaly_regions, anomaly_score
def extract_features(self, image: np.ndarray) -> Dict[str, float]:
"""
Extract defocus map features.
Args:
image: RGB image array (H, W, 3) or PIL Image
Returns:
Dictionary of defocus features
"""
if not isinstance(image, np.ndarray):
from PIL import Image
image = np.array(image)
# Convert to grayscale for defocus estimation
if len(image.shape) == 3:
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
gray = image
# Estimate defocus map
defocus_map = self._estimate_defocus_map(gray)
# Compute consistency score
consistency_score = self._compute_consistency_score(defocus_map)
# Detect anomalies
anomaly_regions, anomaly_score = self._detect_anomalies(
defocus_map, consistency_score
)
# Compute statistics on defocus map
defocus_mean = float(np.mean(defocus_map))
defocus_std = float(np.std(defocus_map))
defocus_entropy = float(entropy(defocus_map.flatten() + 1e-10))
features = {
'defocus_consistency_score': consistency_score,
'defocus_anomaly_score': anomaly_score,
'defocus_mean': defocus_mean,
'defocus_std': defocus_std,
'defocus_entropy': defocus_entropy,
'defocus_n_anomalies': float(len(anomaly_regions)),
'defocus_anomalies_detected': float(len(anomaly_regions) > 0)
}
return features
def extract_additional_features(image_path: str,
feature_types: list = None) -> Dict:
"""
Extract additional forensic features (CFA, defocus, etc.).
Args:
image_path: Path to image file
feature_types: List of feature types to extract (e.g., ['cfa', 'defocus'])
If None, extracts all available features
Returns:
Dictionary of extracted features
"""
from PIL import Image
# Load image
try:
image = Image.open(image_path).convert('RGB')
image_np = np.array(image)
except Exception as e:
return {
'status': 'error',
'error': f'Failed to load image: {str(e)}'
}
if feature_types is None:
feature_types = ['cfa', 'defocus']
results = {
'status': 'completed',
'image_path': image_path,
'features': {}
}
# Extract CFA features
if 'cfa' in feature_types:
try:
cfa_extractor = CFAExtractor()
cfa_features = cfa_extractor.extract_features(image_np)
results['features']['cfa'] = cfa_features
except Exception as e:
results['features']['cfa'] = {
'status': 'error',
'error': str(e)
}
# Extract defocus features
if 'defocus' in feature_types:
try:
defocus_extractor = DefocusExtractor()
defocus_features = defocus_extractor.extract_features(image_np)
results['features']['defocus'] = defocus_features
except Exception as e:
results['features']['defocus'] = {
'status': 'error',
'error': str(e)
}
return results