""" RF-DETR Optimized Preprocessing This module provides preprocessing specifically optimized for RF-DETR model. Unlike generic preprocessing, this version preserves the pixel value distributions expected by RF-DETR's ImageNet normalization (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]). Key Principles: 1. Denoise to remove compression artifacts WITHOUT changing distributions 2. Color harmonization for cross-device consistency 3. PRESERVE global mean/std values for ImageNet normalization compatibility 4. Gentle adjustments only (no aggressive CLAHE or histogram equalization) Differences from generic preprocessing: - Generic: Aggressive normalization, CLAHE, brightness adjustment - RF-DETR optimized: Gentle denoising, color balance, distribution-preserving """ import cv2 import numpy as np from PIL import Image from typing import Union, Tuple, Optional from pathlib import Path class RFDETRPreprocessor: """ Preprocessing optimized specifically for RF-DETR model Focuses on: - Denoising compression artifacts - Cross-device color consistency - Preserving pixel value distributions for ImageNet normalization """ # ImageNet normalization values used by RF-DETR IMAGENET_MEAN = [0.485, 0.456, 0.406] # Expected by RF-DETR IMAGENET_STD = [0.229, 0.224, 0.225] # Expected by RF-DETR def __init__( self, denoise: bool = True, color_balance: bool = True, preserve_distribution: bool = True, denoise_strength: int = 5 # Gentle by default ): """ Initialize RF-DETR optimized preprocessor Args: denoise: Remove JPEG/PNG compression artifacts color_balance: Balance colors for cross-device consistency preserve_distribution: Preserve mean/std for ImageNet norm denoise_strength: Denoising strength (1-10, lower=gentler) """ self.denoise = denoise self.color_balance = color_balance self.preserve_distribution = preserve_distribution self.denoise_strength = denoise_strength def preprocess(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray: """ Apply RF-DETR optimized preprocessing Args: image: Input image (path, PIL, or numpy array) Returns: Preprocessed numpy array in RGB format, ready for RF-DETR """ # Load image img_array = self._load_image(image) # Store original statistics if preservation is needed if self.preserve_distribution: original_mean = np.mean(img_array, axis=(0, 1)) original_std = np.std(img_array, axis=(0, 1)) # 1. Gentle denoising (removes artifacts without changing distributions) if self.denoise: img_array = self._gentle_denoise(img_array) # 2. Color balance for cross-device consistency if self.color_balance: img_array = self._balance_colors(img_array) # 3. Restore original distribution if needed if self.preserve_distribution: img_array = self._restore_distribution( img_array, original_mean, original_std ) return img_array def _load_image(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray: """Load image from various formats""" if isinstance(image, (str, Path)): pil_image = Image.open(image).convert('RGB') return np.array(pil_image) elif isinstance(image, Image.Image): return np.array(image.convert('RGB')) elif isinstance(image, np.ndarray): if len(image.shape) == 2: return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) elif image.shape[2] == 4: return cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) elif image.shape[2] == 3: return image.copy() else: raise ValueError(f"Unsupported image type: {type(image)}") def _gentle_denoise(self, img: np.ndarray) -> np.ndarray: """ Gentle denoising that removes compression artifacts WITHOUT significantly changing pixel distributions Uses bilateral filter which preserves edges and distributions better than other methods. """ # Convert RGB to BGR for OpenCV img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Bilateral filter: removes noise while preserving edges # and maintaining distribution better than other methods denoised = cv2.bilateralFilter( img_bgr, d=self.denoise_strength, # Diameter sigmaColor=self.denoise_strength * 10, sigmaSpace=self.denoise_strength * 10 ) # Convert back to RGB return cv2.cvtColor(denoised, cv2.COLOR_BGR2RGB) def _balance_colors(self, img: np.ndarray) -> np.ndarray: """ Balance colors for cross-device consistency Uses gray world assumption: average color should be gray. This reduces impact of different color profiles (Samsung vivid vs Pixel neutral) while preserving overall brightness and contrast. """ # Calculate mean for each channel mean_r = np.mean(img[:, :, 0]) mean_g = np.mean(img[:, :, 1]) mean_b = np.mean(img[:, :, 2]) # Calculate gray average gray_avg = (mean_r + mean_g + mean_b) / 3.0 # Gentle color balance (only 50% correction to preserve original look) alpha = 0.5 # 50% correction img_balanced = img.copy().astype(np.float32) if mean_r > 0: img_balanced[:, :, 0] = img_balanced[:, :, 0] * (1 - alpha + alpha * gray_avg / mean_r) if mean_g > 0: img_balanced[:, :, 1] = img_balanced[:, :, 1] * (1 - alpha + alpha * gray_avg / mean_g) if mean_b > 0: img_balanced[:, :, 2] = img_balanced[:, :, 2] * (1 - alpha + alpha * gray_avg / mean_b) # Clip to valid range img_balanced = np.clip(img_balanced, 0, 255).astype(np.uint8) return img_balanced def _restore_distribution( self, img: np.ndarray, target_mean: np.ndarray, target_std: np.ndarray ) -> np.ndarray: """ Restore original mean/std distribution This ensures that preprocessing doesn't interfere with RF-DETR's ImageNet normalization expectations. """ img_float = img.astype(np.float32) # Calculate current statistics current_mean = np.mean(img_float, axis=(0, 1)) current_std = np.std(img_float, axis=(0, 1)) # Restore distribution for each channel for c in range(3): if current_std[c] > 1e-6: # Avoid division by zero # Standardize to zero mean, unit std img_float[:, :, c] = (img_float[:, :, c] - current_mean[c]) / current_std[c] # Restore original distribution img_float[:, :, c] = img_float[:, :, c] * target_std[c] + target_mean[c] # Clip to valid range img_restored = np.clip(img_float, 0, 255).astype(np.uint8) return img_restored # Preset configurations for RF-DETR RFDETR_PRESETS = { "gentle": RFDETRPreprocessor( denoise=True, color_balance=False, preserve_distribution=True, denoise_strength=3 # Very gentle ), "standard": RFDETRPreprocessor( denoise=True, color_balance=True, preserve_distribution=True, denoise_strength=5 # Moderate ), "aggressive_denoise": RFDETRPreprocessor( denoise=True, color_balance=True, preserve_distribution=True, denoise_strength=8 # Strong denoising ), "color_only": RFDETRPreprocessor( denoise=False, color_balance=True, preserve_distribution=True, denoise_strength=0 ), } def preprocess_for_rfdetr( image: Union[str, Path, np.ndarray, Image.Image], preset: str = "standard" ) -> np.ndarray: """ Convenience function for RF-DETR optimized preprocessing Args: image: Input image preset: Preprocessing preset optimized for RF-DETR ('gentle', 'standard', 'aggressive_denoise', 'color_only') Returns: Preprocessed numpy array in RGB format, ready for RF-DETR Example: >>> img = preprocess_for_rfdetr("samsung.png", preset="standard") >>> results = rfdetr_model.predict(img, threshold=0.35) """ if preset not in RFDETR_PRESETS: raise ValueError( f"Unknown preset: {preset}. Available: {list(RFDETR_PRESETS.keys())}" ) preprocessor = RFDETR_PRESETS[preset] return preprocessor.preprocess(image) def compare_distributions(original: np.ndarray, preprocessed: np.ndarray) -> dict: """ Compare pixel distributions before/after preprocessing Useful for verifying that preprocessing doesn't distort distributions too much for RF-DETR's ImageNet normalization. Args: original: Original image preprocessed: Preprocessed image Returns: Dict with distribution statistics """ orig_mean = np.mean(original, axis=(0, 1)) orig_std = np.std(original, axis=(0, 1)) prep_mean = np.mean(preprocessed, axis=(0, 1)) prep_std = np.std(preprocessed, axis=(0, 1)) return { "original": { "mean": orig_mean.tolist(), "std": orig_std.tolist(), "mean_normalized": (orig_mean / 255.0).tolist(), # ImageNet scale }, "preprocessed": { "mean": prep_mean.tolist(), "std": prep_std.tolist(), "mean_normalized": (prep_mean / 255.0).tolist(), }, "difference": { "mean_delta": (prep_mean - orig_mean).tolist(), "std_delta": (prep_std - orig_std).tolist(), "mean_delta_pct": ((prep_mean - orig_mean) / (orig_mean + 1e-6) * 100).tolist(), }, "imagenet_expected": { "mean": [0.485, 0.456, 0.406], "std": [0.229, 0.224, 0.225] } }