File size: 10,509 Bytes
77da9e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
"""
RF-DETR Optimized Preprocessing

This module provides preprocessing specifically optimized for RF-DETR model.
Unlike generic preprocessing, this version preserves the pixel value distributions
expected by RF-DETR's ImageNet normalization (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]).

Key Principles:
1. Denoise to remove compression artifacts WITHOUT changing distributions
2. Color harmonization for cross-device consistency
3. PRESERVE global mean/std values for ImageNet normalization compatibility
4. Gentle adjustments only (no aggressive CLAHE or histogram equalization)

Differences from generic preprocessing:
- Generic: Aggressive normalization, CLAHE, brightness adjustment
- RF-DETR optimized: Gentle denoising, color balance, distribution-preserving
"""

import cv2
import numpy as np
from PIL import Image
from typing import Union, Tuple, Optional
from pathlib import Path


class RFDETRPreprocessor:
    """
    Preprocessing optimized specifically for RF-DETR model
    
    Focuses on:
    - Denoising compression artifacts
    - Cross-device color consistency
    - Preserving pixel value distributions for ImageNet normalization
    """
    
    # ImageNet normalization values used by RF-DETR
    IMAGENET_MEAN = [0.485, 0.456, 0.406]  # Expected by RF-DETR
    IMAGENET_STD = [0.229, 0.224, 0.225]   # Expected by RF-DETR
    
    def __init__(
        self,
        denoise: bool = True,
        color_balance: bool = True,
        preserve_distribution: bool = True,
        denoise_strength: int = 5  # Gentle by default
    ):
        """
        Initialize RF-DETR optimized preprocessor
        
        Args:
            denoise: Remove JPEG/PNG compression artifacts
            color_balance: Balance colors for cross-device consistency
            preserve_distribution: Preserve mean/std for ImageNet norm
            denoise_strength: Denoising strength (1-10, lower=gentler)
        """
        self.denoise = denoise
        self.color_balance = color_balance
        self.preserve_distribution = preserve_distribution
        self.denoise_strength = denoise_strength
    
    def preprocess(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray:
        """
        Apply RF-DETR optimized preprocessing
        
        Args:
            image: Input image (path, PIL, or numpy array)
            
        Returns:
            Preprocessed numpy array in RGB format, ready for RF-DETR
        """
        # Load image
        img_array = self._load_image(image)
        
        # Store original statistics if preservation is needed
        if self.preserve_distribution:
            original_mean = np.mean(img_array, axis=(0, 1))
            original_std = np.std(img_array, axis=(0, 1))
        
        # 1. Gentle denoising (removes artifacts without changing distributions)
        if self.denoise:
            img_array = self._gentle_denoise(img_array)
        
        # 2. Color balance for cross-device consistency
        if self.color_balance:
            img_array = self._balance_colors(img_array)
        
        # 3. Restore original distribution if needed
        if self.preserve_distribution:
            img_array = self._restore_distribution(
                img_array,
                original_mean,
                original_std
            )
        
        return img_array
    
    def _load_image(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray:
        """Load image from various formats"""
        if isinstance(image, (str, Path)):
            pil_image = Image.open(image).convert('RGB')
            return np.array(pil_image)
        elif isinstance(image, Image.Image):
            return np.array(image.convert('RGB'))
        elif isinstance(image, np.ndarray):
            if len(image.shape) == 2:
                return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
            elif image.shape[2] == 4:
                return cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
            elif image.shape[2] == 3:
                return image.copy()
        else:
            raise ValueError(f"Unsupported image type: {type(image)}")
    
    def _gentle_denoise(self, img: np.ndarray) -> np.ndarray:
        """
        Gentle denoising that removes compression artifacts
        WITHOUT significantly changing pixel distributions
        
        Uses bilateral filter which preserves edges and distributions
        better than other methods.
        """
        # Convert RGB to BGR for OpenCV
        img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        
        # Bilateral filter: removes noise while preserving edges
        # and maintaining distribution better than other methods
        denoised = cv2.bilateralFilter(
            img_bgr,
            d=self.denoise_strength,  # Diameter
            sigmaColor=self.denoise_strength * 10,
            sigmaSpace=self.denoise_strength * 10
        )
        
        # Convert back to RGB
        return cv2.cvtColor(denoised, cv2.COLOR_BGR2RGB)
    
    def _balance_colors(self, img: np.ndarray) -> np.ndarray:
        """
        Balance colors for cross-device consistency
        
        Uses gray world assumption: average color should be gray.
        This reduces impact of different color profiles (Samsung vivid vs Pixel neutral)
        while preserving overall brightness and contrast.
        """
        # Calculate mean for each channel
        mean_r = np.mean(img[:, :, 0])
        mean_g = np.mean(img[:, :, 1])
        mean_b = np.mean(img[:, :, 2])
        
        # Calculate gray average
        gray_avg = (mean_r + mean_g + mean_b) / 3.0
        
        # Gentle color balance (only 50% correction to preserve original look)
        alpha = 0.5  # 50% correction
        
        img_balanced = img.copy().astype(np.float32)
        if mean_r > 0:
            img_balanced[:, :, 0] = img_balanced[:, :, 0] * (1 - alpha + alpha * gray_avg / mean_r)
        if mean_g > 0:
            img_balanced[:, :, 1] = img_balanced[:, :, 1] * (1 - alpha + alpha * gray_avg / mean_g)
        if mean_b > 0:
            img_balanced[:, :, 2] = img_balanced[:, :, 2] * (1 - alpha + alpha * gray_avg / mean_b)
        
        # Clip to valid range
        img_balanced = np.clip(img_balanced, 0, 255).astype(np.uint8)
        
        return img_balanced
    
    def _restore_distribution(
        self,
        img: np.ndarray,
        target_mean: np.ndarray,
        target_std: np.ndarray
    ) -> np.ndarray:
        """
        Restore original mean/std distribution
        
        This ensures that preprocessing doesn't interfere with
        RF-DETR's ImageNet normalization expectations.
        """
        img_float = img.astype(np.float32)
        
        # Calculate current statistics
        current_mean = np.mean(img_float, axis=(0, 1))
        current_std = np.std(img_float, axis=(0, 1))
        
        # Restore distribution for each channel
        for c in range(3):
            if current_std[c] > 1e-6:  # Avoid division by zero
                # Standardize to zero mean, unit std
                img_float[:, :, c] = (img_float[:, :, c] - current_mean[c]) / current_std[c]
                # Restore original distribution
                img_float[:, :, c] = img_float[:, :, c] * target_std[c] + target_mean[c]
        
        # Clip to valid range
        img_restored = np.clip(img_float, 0, 255).astype(np.uint8)
        
        return img_restored


# Preset configurations for RF-DETR
RFDETR_PRESETS = {
    "gentle": RFDETRPreprocessor(
        denoise=True,
        color_balance=False,
        preserve_distribution=True,
        denoise_strength=3  # Very gentle
    ),
    
    "standard": RFDETRPreprocessor(
        denoise=True,
        color_balance=True,
        preserve_distribution=True,
        denoise_strength=5  # Moderate
    ),
    
    "aggressive_denoise": RFDETRPreprocessor(
        denoise=True,
        color_balance=True,
        preserve_distribution=True,
        denoise_strength=8  # Strong denoising
    ),
    
    "color_only": RFDETRPreprocessor(
        denoise=False,
        color_balance=True,
        preserve_distribution=True,
        denoise_strength=0
    ),
}


def preprocess_for_rfdetr(
    image: Union[str, Path, np.ndarray, Image.Image],
    preset: str = "standard"
) -> np.ndarray:
    """
    Convenience function for RF-DETR optimized preprocessing
    
    Args:
        image: Input image
        preset: Preprocessing preset optimized for RF-DETR
                ('gentle', 'standard', 'aggressive_denoise', 'color_only')
        
    Returns:
        Preprocessed numpy array in RGB format, ready for RF-DETR
        
    Example:
        >>> img = preprocess_for_rfdetr("samsung.png", preset="standard")
        >>> results = rfdetr_model.predict(img, threshold=0.35)
    """
    if preset not in RFDETR_PRESETS:
        raise ValueError(
            f"Unknown preset: {preset}. Available: {list(RFDETR_PRESETS.keys())}"
        )
    
    preprocessor = RFDETR_PRESETS[preset]
    return preprocessor.preprocess(image)


def compare_distributions(original: np.ndarray, preprocessed: np.ndarray) -> dict:
    """
    Compare pixel distributions before/after preprocessing
    
    Useful for verifying that preprocessing doesn't distort distributions
    too much for RF-DETR's ImageNet normalization.
    
    Args:
        original: Original image
        preprocessed: Preprocessed image
        
    Returns:
        Dict with distribution statistics
    """
    orig_mean = np.mean(original, axis=(0, 1))
    orig_std = np.std(original, axis=(0, 1))
    
    prep_mean = np.mean(preprocessed, axis=(0, 1))
    prep_std = np.std(preprocessed, axis=(0, 1))
    
    return {
        "original": {
            "mean": orig_mean.tolist(),
            "std": orig_std.tolist(),
            "mean_normalized": (orig_mean / 255.0).tolist(),  # ImageNet scale
        },
        "preprocessed": {
            "mean": prep_mean.tolist(),
            "std": prep_std.tolist(),
            "mean_normalized": (prep_mean / 255.0).tolist(),
        },
        "difference": {
            "mean_delta": (prep_mean - orig_mean).tolist(),
            "std_delta": (prep_std - orig_std).tolist(),
            "mean_delta_pct": ((prep_mean - orig_mean) / (orig_mean + 1e-6) * 100).tolist(),
        },
        "imagenet_expected": {
            "mean": [0.485, 0.456, 0.406],
            "std": [0.229, 0.224, 0.225]
        }
    }