CU1-X / detection /rfdetr_preprocessing.py
AI-DrivenTesting's picture
init
77da9e2
"""
RF-DETR Optimized Preprocessing
This module provides preprocessing specifically optimized for RF-DETR model.
Unlike generic preprocessing, this version preserves the pixel value distributions
expected by RF-DETR's ImageNet normalization (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]).
Key Principles:
1. Denoise to remove compression artifacts WITHOUT changing distributions
2. Color harmonization for cross-device consistency
3. PRESERVE global mean/std values for ImageNet normalization compatibility
4. Gentle adjustments only (no aggressive CLAHE or histogram equalization)
Differences from generic preprocessing:
- Generic: Aggressive normalization, CLAHE, brightness adjustment
- RF-DETR optimized: Gentle denoising, color balance, distribution-preserving
"""
import cv2
import numpy as np
from PIL import Image
from typing import Union, Tuple, Optional
from pathlib import Path
class RFDETRPreprocessor:
"""
Preprocessing optimized specifically for RF-DETR model
Focuses on:
- Denoising compression artifacts
- Cross-device color consistency
- Preserving pixel value distributions for ImageNet normalization
"""
# ImageNet normalization values used by RF-DETR
IMAGENET_MEAN = [0.485, 0.456, 0.406] # Expected by RF-DETR
IMAGENET_STD = [0.229, 0.224, 0.225] # Expected by RF-DETR
def __init__(
self,
denoise: bool = True,
color_balance: bool = True,
preserve_distribution: bool = True,
denoise_strength: int = 5 # Gentle by default
):
"""
Initialize RF-DETR optimized preprocessor
Args:
denoise: Remove JPEG/PNG compression artifacts
color_balance: Balance colors for cross-device consistency
preserve_distribution: Preserve mean/std for ImageNet norm
denoise_strength: Denoising strength (1-10, lower=gentler)
"""
self.denoise = denoise
self.color_balance = color_balance
self.preserve_distribution = preserve_distribution
self.denoise_strength = denoise_strength
def preprocess(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray:
"""
Apply RF-DETR optimized preprocessing
Args:
image: Input image (path, PIL, or numpy array)
Returns:
Preprocessed numpy array in RGB format, ready for RF-DETR
"""
# Load image
img_array = self._load_image(image)
# Store original statistics if preservation is needed
if self.preserve_distribution:
original_mean = np.mean(img_array, axis=(0, 1))
original_std = np.std(img_array, axis=(0, 1))
# 1. Gentle denoising (removes artifacts without changing distributions)
if self.denoise:
img_array = self._gentle_denoise(img_array)
# 2. Color balance for cross-device consistency
if self.color_balance:
img_array = self._balance_colors(img_array)
# 3. Restore original distribution if needed
if self.preserve_distribution:
img_array = self._restore_distribution(
img_array,
original_mean,
original_std
)
return img_array
def _load_image(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray:
"""Load image from various formats"""
if isinstance(image, (str, Path)):
pil_image = Image.open(image).convert('RGB')
return np.array(pil_image)
elif isinstance(image, Image.Image):
return np.array(image.convert('RGB'))
elif isinstance(image, np.ndarray):
if len(image.shape) == 2:
return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
elif image.shape[2] == 4:
return cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
elif image.shape[2] == 3:
return image.copy()
else:
raise ValueError(f"Unsupported image type: {type(image)}")
def _gentle_denoise(self, img: np.ndarray) -> np.ndarray:
"""
Gentle denoising that removes compression artifacts
WITHOUT significantly changing pixel distributions
Uses bilateral filter which preserves edges and distributions
better than other methods.
"""
# Convert RGB to BGR for OpenCV
img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
# Bilateral filter: removes noise while preserving edges
# and maintaining distribution better than other methods
denoised = cv2.bilateralFilter(
img_bgr,
d=self.denoise_strength, # Diameter
sigmaColor=self.denoise_strength * 10,
sigmaSpace=self.denoise_strength * 10
)
# Convert back to RGB
return cv2.cvtColor(denoised, cv2.COLOR_BGR2RGB)
def _balance_colors(self, img: np.ndarray) -> np.ndarray:
"""
Balance colors for cross-device consistency
Uses gray world assumption: average color should be gray.
This reduces impact of different color profiles (Samsung vivid vs Pixel neutral)
while preserving overall brightness and contrast.
"""
# Calculate mean for each channel
mean_r = np.mean(img[:, :, 0])
mean_g = np.mean(img[:, :, 1])
mean_b = np.mean(img[:, :, 2])
# Calculate gray average
gray_avg = (mean_r + mean_g + mean_b) / 3.0
# Gentle color balance (only 50% correction to preserve original look)
alpha = 0.5 # 50% correction
img_balanced = img.copy().astype(np.float32)
if mean_r > 0:
img_balanced[:, :, 0] = img_balanced[:, :, 0] * (1 - alpha + alpha * gray_avg / mean_r)
if mean_g > 0:
img_balanced[:, :, 1] = img_balanced[:, :, 1] * (1 - alpha + alpha * gray_avg / mean_g)
if mean_b > 0:
img_balanced[:, :, 2] = img_balanced[:, :, 2] * (1 - alpha + alpha * gray_avg / mean_b)
# Clip to valid range
img_balanced = np.clip(img_balanced, 0, 255).astype(np.uint8)
return img_balanced
def _restore_distribution(
self,
img: np.ndarray,
target_mean: np.ndarray,
target_std: np.ndarray
) -> np.ndarray:
"""
Restore original mean/std distribution
This ensures that preprocessing doesn't interfere with
RF-DETR's ImageNet normalization expectations.
"""
img_float = img.astype(np.float32)
# Calculate current statistics
current_mean = np.mean(img_float, axis=(0, 1))
current_std = np.std(img_float, axis=(0, 1))
# Restore distribution for each channel
for c in range(3):
if current_std[c] > 1e-6: # Avoid division by zero
# Standardize to zero mean, unit std
img_float[:, :, c] = (img_float[:, :, c] - current_mean[c]) / current_std[c]
# Restore original distribution
img_float[:, :, c] = img_float[:, :, c] * target_std[c] + target_mean[c]
# Clip to valid range
img_restored = np.clip(img_float, 0, 255).astype(np.uint8)
return img_restored
# Preset configurations for RF-DETR
RFDETR_PRESETS = {
"gentle": RFDETRPreprocessor(
denoise=True,
color_balance=False,
preserve_distribution=True,
denoise_strength=3 # Very gentle
),
"standard": RFDETRPreprocessor(
denoise=True,
color_balance=True,
preserve_distribution=True,
denoise_strength=5 # Moderate
),
"aggressive_denoise": RFDETRPreprocessor(
denoise=True,
color_balance=True,
preserve_distribution=True,
denoise_strength=8 # Strong denoising
),
"color_only": RFDETRPreprocessor(
denoise=False,
color_balance=True,
preserve_distribution=True,
denoise_strength=0
),
}
def preprocess_for_rfdetr(
image: Union[str, Path, np.ndarray, Image.Image],
preset: str = "standard"
) -> np.ndarray:
"""
Convenience function for RF-DETR optimized preprocessing
Args:
image: Input image
preset: Preprocessing preset optimized for RF-DETR
('gentle', 'standard', 'aggressive_denoise', 'color_only')
Returns:
Preprocessed numpy array in RGB format, ready for RF-DETR
Example:
>>> img = preprocess_for_rfdetr("samsung.png", preset="standard")
>>> results = rfdetr_model.predict(img, threshold=0.35)
"""
if preset not in RFDETR_PRESETS:
raise ValueError(
f"Unknown preset: {preset}. Available: {list(RFDETR_PRESETS.keys())}"
)
preprocessor = RFDETR_PRESETS[preset]
return preprocessor.preprocess(image)
def compare_distributions(original: np.ndarray, preprocessed: np.ndarray) -> dict:
"""
Compare pixel distributions before/after preprocessing
Useful for verifying that preprocessing doesn't distort distributions
too much for RF-DETR's ImageNet normalization.
Args:
original: Original image
preprocessed: Preprocessed image
Returns:
Dict with distribution statistics
"""
orig_mean = np.mean(original, axis=(0, 1))
orig_std = np.std(original, axis=(0, 1))
prep_mean = np.mean(preprocessed, axis=(0, 1))
prep_std = np.std(preprocessed, axis=(0, 1))
return {
"original": {
"mean": orig_mean.tolist(),
"std": orig_std.tolist(),
"mean_normalized": (orig_mean / 255.0).tolist(), # ImageNet scale
},
"preprocessed": {
"mean": prep_mean.tolist(),
"std": prep_std.tolist(),
"mean_normalized": (prep_mean / 255.0).tolist(),
},
"difference": {
"mean_delta": (prep_mean - orig_mean).tolist(),
"std_delta": (prep_std - orig_std).tolist(),
"mean_delta_pct": ((prep_mean - orig_mean) / (orig_mean + 1e-6) * 100).tolist(),
},
"imagenet_expected": {
"mean": [0.485, 0.456, 0.406],
"std": [0.229, 0.224, 0.225]
}
}