Spaces:
Sleeping
Sleeping
| """ | |
| Image Preprocessing - Screenshot Standardization | |
| This module provides preprocessing functions to normalize screenshots from | |
| different devices (Samsung, Pixel, Oppo, etc.) to ensure consistent detection | |
| results regardless of device manufacturer. | |
| Key Issues Addressed: | |
| - Different color profiles (Samsung vivid vs Pixel neutral) | |
| - Variable contrast and brightness | |
| - Different compression levels | |
| - Screen calibration differences | |
| Preprocessing Pipeline: | |
| 1. Color space normalization (sRGB standard) | |
| 2. Contrast and brightness normalization | |
| 3. Resolution standardization (optional) | |
| 4. Denoising (removes JPEG artifacts) | |
| 5. Sharpness enhancement (optional) | |
| """ | |
| import cv2 | |
| import numpy as np | |
| from PIL import Image | |
| from typing import Union, Tuple, Optional | |
| from pathlib import Path | |
| class ImagePreprocessor: | |
| """ | |
| Preprocessor for standardizing screenshots from different devices | |
| """ | |
| def __init__( | |
| self, | |
| target_colorspace: str = "srgb", | |
| normalize_contrast: bool = True, | |
| normalize_brightness: bool = True, | |
| denoise: bool = True, | |
| target_size: Optional[Tuple[int, int]] = None, | |
| enhance_sharpness: bool = False, | |
| clahe_enabled: bool = True | |
| ): | |
| """ | |
| Initialize image preprocessor | |
| Args: | |
| target_colorspace: Target color space ('srgb', 'lab', 'hsv') | |
| normalize_contrast: Enable contrast normalization | |
| normalize_brightness: Enable brightness normalization | |
| denoise: Remove JPEG/PNG artifacts | |
| target_size: Optional (width, height) for resizing | |
| enhance_sharpness: Enhance image sharpness (for blurry screenshots) | |
| clahe_enabled: Use CLAHE for adaptive contrast enhancement | |
| """ | |
| self.target_colorspace = target_colorspace | |
| self.normalize_contrast = normalize_contrast | |
| self.normalize_brightness = normalize_brightness | |
| self.denoise = denoise | |
| self.target_size = target_size | |
| self.enhance_sharpness = enhance_sharpness | |
| self.clahe_enabled = clahe_enabled | |
| def preprocess(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray: | |
| """ | |
| Apply full preprocessing pipeline | |
| Args: | |
| image: Input image (path, PIL, or numpy array) | |
| Returns: | |
| Preprocessed numpy array in RGB format | |
| """ | |
| # Load image | |
| img_array = self._load_image(image) | |
| # 1. Denoise (remove compression artifacts) | |
| if self.denoise: | |
| img_array = self._denoise_image(img_array) | |
| # 2. Color space normalization | |
| img_array = self._normalize_colors(img_array) | |
| # 3. Contrast and brightness normalization | |
| if self.normalize_contrast or self.normalize_brightness: | |
| img_array = self._normalize_exposure(img_array) | |
| # 4. CLAHE (Contrast Limited Adaptive Histogram Equalization) | |
| if self.clahe_enabled: | |
| img_array = self._apply_clahe(img_array) | |
| # 5. Sharpness enhancement (optional) | |
| if self.enhance_sharpness: | |
| img_array = self._enhance_sharpness(img_array) | |
| # 6. Resize (optional) | |
| if self.target_size: | |
| img_array = self._resize_image(img_array, self.target_size) | |
| return img_array | |
| def _load_image(self, image: Union[str, Path, np.ndarray, Image.Image]) -> np.ndarray: | |
| """Load image from various formats""" | |
| if isinstance(image, (str, Path)): | |
| pil_image = Image.open(image).convert('RGB') | |
| return np.array(pil_image) | |
| elif isinstance(image, Image.Image): | |
| return np.array(image.convert('RGB')) | |
| elif isinstance(image, np.ndarray): | |
| if len(image.shape) == 2: | |
| return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) | |
| elif image.shape[2] == 4: | |
| return cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) | |
| elif image.shape[2] == 3: | |
| return image | |
| else: | |
| raise ValueError(f"Unsupported image type: {type(image)}") | |
| def _denoise_image(self, img: np.ndarray) -> np.ndarray: | |
| """ | |
| Remove compression artifacts and noise | |
| Uses fastNlMeansDenoisingColored which is effective for: | |
| - JPEG compression artifacts | |
| - PNG compression noise | |
| - Sensor noise from screenshots | |
| """ | |
| # Convert RGB to BGR for OpenCV | |
| img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| # Apply denoising (h=10 is good for screenshots) | |
| denoised = cv2.fastNlMeansDenoisingColored( | |
| img_bgr, | |
| None, | |
| h=10, # Filter strength for luminance | |
| hColor=10, # Filter strength for color | |
| templateWindowSize=7, | |
| searchWindowSize=21 | |
| ) | |
| # Convert back to RGB | |
| return cv2.cvtColor(denoised, cv2.COLOR_BGR2RGB) | |
| def _normalize_colors(self, img: np.ndarray) -> np.ndarray: | |
| """ | |
| Normalize color distribution to standard sRGB | |
| This reduces the impact of: | |
| - Samsung's "Vivid" mode (oversaturated colors) | |
| - Different color temperature settings | |
| - Display calibration differences | |
| """ | |
| if self.target_colorspace == "srgb": | |
| # Simple normalization: scale to [0, 255] range | |
| img_normalized = cv2.normalize( | |
| img, | |
| None, | |
| alpha=0, | |
| beta=255, | |
| norm_type=cv2.NORM_MINMAX, | |
| dtype=cv2.CV_8U | |
| ) | |
| return img_normalized | |
| elif self.target_colorspace == "lab": | |
| # Convert to LAB for perceptual uniformity | |
| img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| img_lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB) | |
| # Normalize L channel (lightness) | |
| l, a, b = cv2.split(img_lab) | |
| l = cv2.normalize(l, None, 0, 255, cv2.NORM_MINMAX) | |
| img_lab = cv2.merge([l, a, b]) | |
| img_bgr = cv2.cvtColor(img_lab, cv2.COLOR_LAB2BGR) | |
| return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) | |
| return img | |
| def _normalize_exposure(self, img: np.ndarray) -> np.ndarray: | |
| """ | |
| Normalize brightness and contrast | |
| Reduces impact of: | |
| - Different screen brightness settings | |
| - Auto-brightness variations | |
| - Ambient light conditions during capture | |
| """ | |
| # Convert to LAB color space | |
| img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| img_lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB) | |
| l, a, b = cv2.split(img_lab) | |
| # Normalize brightness (L channel) | |
| if self.normalize_brightness: | |
| l_mean = np.mean(l) | |
| l_std = np.std(l) | |
| # Target mean brightness: 128 (middle gray) | |
| target_mean = 128 | |
| target_std = 50 | |
| # Normalize | |
| l = ((l - l_mean) / (l_std + 1e-6)) * target_std + target_mean | |
| l = np.clip(l, 0, 255).astype(np.uint8) | |
| # Merge and convert back | |
| img_lab = cv2.merge([l, a, b]) | |
| img_bgr = cv2.cvtColor(img_lab, cv2.COLOR_LAB2BGR) | |
| return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) | |
| def _apply_clahe(self, img: np.ndarray) -> np.ndarray: | |
| """ | |
| Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) | |
| Benefits: | |
| - Improves local contrast | |
| - Makes text more readable | |
| - Helps with dark/light UI elements | |
| - Preserves overall appearance | |
| """ | |
| # Convert to LAB | |
| img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| img_lab = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2LAB) | |
| l, a, b = cv2.split(img_lab) | |
| # Apply CLAHE to L channel only | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) | |
| l = clahe.apply(l) | |
| # Merge and convert back | |
| img_lab = cv2.merge([l, a, b]) | |
| img_bgr = cv2.cvtColor(img_lab, cv2.COLOR_LAB2BGR) | |
| return cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) | |
| def _enhance_sharpness(self, img: np.ndarray) -> np.ndarray: | |
| """ | |
| Enhance image sharpness | |
| Useful for: | |
| - Blurry screenshots | |
| - Low-resolution captures | |
| - Improving OCR accuracy | |
| """ | |
| # Unsharp mask technique | |
| gaussian = cv2.GaussianBlur(img, (0, 0), 2.0) | |
| sharpened = cv2.addWeighted(img, 1.5, gaussian, -0.5, 0) | |
| return np.clip(sharpened, 0, 255).astype(np.uint8) | |
| def _resize_image(self, img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray: | |
| """ | |
| Resize image to target size | |
| Args: | |
| img: Input image | |
| target_size: (width, height) | |
| """ | |
| return cv2.resize(img, target_size, interpolation=cv2.INTER_LANCZOS4) | |
| # Preset configurations for different use cases | |
| PRESETS = { | |
| "standard": ImagePreprocessor( | |
| normalize_contrast=True, | |
| normalize_brightness=True, | |
| denoise=True, | |
| clahe_enabled=True, | |
| enhance_sharpness=False | |
| ), | |
| "aggressive": ImagePreprocessor( | |
| normalize_contrast=True, | |
| normalize_brightness=True, | |
| denoise=True, | |
| clahe_enabled=True, | |
| enhance_sharpness=True | |
| ), | |
| "minimal": ImagePreprocessor( | |
| normalize_contrast=False, | |
| normalize_brightness=True, | |
| denoise=True, | |
| clahe_enabled=False, | |
| enhance_sharpness=False | |
| ), | |
| "ocr_optimized": ImagePreprocessor( | |
| normalize_contrast=True, | |
| normalize_brightness=True, | |
| denoise=True, | |
| clahe_enabled=True, | |
| enhance_sharpness=True # Sharp text helps OCR | |
| ), | |
| } | |
| def preprocess_screenshot( | |
| image: Union[str, Path, np.ndarray, Image.Image], | |
| preset: str = "standard" | |
| ) -> np.ndarray: | |
| """ | |
| Convenience function for preprocessing screenshots | |
| Args: | |
| image: Input image | |
| preset: Preprocessing preset ('standard', 'aggressive', 'minimal', 'ocr_optimized') | |
| Returns: | |
| Preprocessed numpy array in RGB format | |
| Example: | |
| >>> img = preprocess_screenshot("samsung_screenshot.png", preset="standard") | |
| >>> results = detector.analyze(img) | |
| """ | |
| if preset not in PRESETS: | |
| raise ValueError(f"Unknown preset: {preset}. Available: {list(PRESETS.keys())}") | |
| preprocessor = PRESETS[preset] | |
| return preprocessor.preprocess(image) | |