JKrishnanandhaa's picture
Upload 54 files
770b89a verified
raw
history blame
7.54 kB
"""
Dataset-aware preprocessing for document forgery detection
Implements Critical Fix #1: Dataset-Aware Preprocessing
"""
import cv2
import numpy as np
from typing import Tuple, Optional
import pywt
from scipy import ndimage
class DocumentPreprocessor:
"""Dataset-aware document preprocessing"""
def __init__(self, config, dataset_name: str):
"""
Initialize preprocessor
Args:
config: Configuration object
dataset_name: Name of dataset (for dataset-aware processing)
"""
self.config = config
self.dataset_name = dataset_name
self.image_size = config.get('data.image_size', 384)
self.noise_threshold = config.get('preprocessing.noise_threshold', 15.0)
# Dataset-aware flags (Critical Fix #1)
self.skip_deskew = config.should_skip_deskew(dataset_name)
self.skip_denoising = config.should_skip_denoising(dataset_name)
def __call__(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Apply preprocessing pipeline
Args:
image: Input image (H, W, 3)
mask: Optional ground truth mask (H, W)
Returns:
Preprocessed image and mask
"""
# 1. Convert to RGB
if len(image.shape) == 2:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
elif image.shape[2] == 4:
image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)
elif image.shape[2] == 3:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 2. Deskew (dataset-aware)
if not self.skip_deskew:
image, mask = self._deskew(image, mask)
# 3. Resize
image, mask = self._resize(image, mask)
# 4. Normalize
image = self._normalize(image)
# 5. Conditional denoising (dataset-aware)
if not self.skip_denoising:
noise_level = self._estimate_noise(image)
if noise_level > self.noise_threshold:
image = self._denoise(image)
return image, mask
def _deskew(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Deskew document image
Args:
image: Input image
mask: Optional mask
Returns:
Deskewed image and mask
"""
# Convert to grayscale for angle detection
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Detect edges
edges = cv2.Canny(gray, 50, 150, apertureSize=3)
# Detect lines using Hough transform
lines = cv2.HoughLines(edges, 1, np.pi / 180, 200)
if lines is not None and len(lines) > 0:
# Calculate dominant angle
angles = []
for rho, theta in lines[:, 0]:
angle = (theta * 180 / np.pi) - 90
angles.append(angle)
# Use median angle
angle = np.median(angles)
# Only deskew if angle is significant (> 0.5 degrees)
if abs(angle) > 0.5:
# Get rotation matrix
h, w = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, 1.0)
# Rotate image
image = cv2.warpAffine(image, M, (w, h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_REPLICATE)
# Rotate mask if provided
if mask is not None:
mask = cv2.warpAffine(mask, M, (w, h),
flags=cv2.INTER_NEAREST,
borderMode=cv2.BORDER_CONSTANT,
borderValue=0)
return image, mask
def _resize(self, image: np.ndarray, mask: Optional[np.ndarray] = None) -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Resize image and mask to target size
Args:
image: Input image
mask: Optional mask
Returns:
Resized image and mask
"""
target_size = (self.image_size, self.image_size)
# Resize image
image = cv2.resize(image, target_size, interpolation=cv2.INTER_CUBIC)
# Resize mask if provided
if mask is not None:
mask = cv2.resize(mask, target_size, interpolation=cv2.INTER_NEAREST)
return image, mask
def _normalize(self, image: np.ndarray) -> np.ndarray:
"""
Normalize pixel values to [0, 1]
Args:
image: Input image
Returns:
Normalized image
"""
return image.astype(np.float32) / 255.0
def _estimate_noise(self, image: np.ndarray) -> float:
"""
Estimate noise level using Laplacian variance and wavelet-based estimation
Args:
image: Input image (normalized)
Returns:
Estimated noise level
"""
# Convert to grayscale for noise estimation
if len(image.shape) == 3:
gray = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
else:
gray = (image * 255).astype(np.uint8)
# Method 1: Laplacian variance
laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
# Method 2: Wavelet-based noise estimation
coeffs = pywt.dwt2(gray, 'db1')
_, (cH, cV, cD) = coeffs
sigma = np.median(np.abs(cD)) / 0.6745
# Combine both estimates
noise_level = (laplacian_var + sigma) / 2.0
return noise_level
def _denoise(self, image: np.ndarray) -> np.ndarray:
"""
Apply conditional denoising
Args:
image: Input image (normalized)
Returns:
Denoised image
"""
# Convert to uint8 for filtering
image_uint8 = (image * 255).astype(np.uint8)
# Apply median filter (3x3)
median_filtered = cv2.medianBlur(image_uint8, 3)
# Apply Gaussian filter (σ ≤ 0.8)
gaussian_filtered = cv2.GaussianBlur(median_filtered, (3, 3), 0.8)
# Convert back to float32
denoised = gaussian_filtered.astype(np.float32) / 255.0
return denoised
def preprocess_image(image: np.ndarray,
mask: Optional[np.ndarray] = None,
config = None,
dataset_name: str = 'default') -> Tuple[np.ndarray, Optional[np.ndarray]]:
"""
Convenience function for preprocessing
Args:
image: Input image
mask: Optional mask
config: Configuration object
dataset_name: Dataset name
Returns:
Preprocessed image and mask
"""
preprocessor = DocumentPreprocessor(config, dataset_name)
return preprocessor(image, mask)