Spaces:
Runtime error
Runtime error
| """ | |
| Image utility functions for preprocessing | |
| Provides functions for resizing, normalization, and quality validation | |
| of card images. | |
| """ | |
| import cv2 | |
| import numpy as np | |
| from typing import Dict, Tuple | |
| from ..utils.logger import get_logger | |
| logger = get_logger(__name__) | |
| def resize_image(image: np.ndarray, size: int = 256) -> np.ndarray: | |
| """ | |
| Resize image to standard square size | |
| Resizes the image to size×size pixels while maintaining quality. | |
| Uses INTER_AREA for shrinking (better quality) and INTER_CUBIC | |
| for enlarging. | |
| Args: | |
| image: Input image (H×W×C) | |
| size: Target size in pixels (default: 256) | |
| Returns: | |
| Resized image (size×size×C) | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided to resize_image") | |
| current_height, current_width = image.shape[:2] | |
| # Choose interpolation method based on whether we're upscaling or downscaling | |
| if current_height > size or current_width > size: | |
| # Downscaling - use INTER_AREA for better quality | |
| interpolation = cv2.INTER_AREA | |
| else: | |
| # Upscaling - use INTER_CUBIC for smoother results | |
| interpolation = cv2.INTER_CUBIC | |
| # Resize to square | |
| resized = cv2.resize(image, (size, size), interpolation=interpolation) | |
| logger.debug(f"Resized image from {current_width}×{current_height} to {size}×{size}") | |
| return resized | |
| def normalize_pixels(image: np.ndarray) -> np.ndarray: | |
| """ | |
| Normalize pixel values to [0, 1] range | |
| Converts uint8 image (0-255) to float32 (0.0-1.0) for model processing. | |
| Args: | |
| image: Input image (uint8) | |
| Returns: | |
| Normalized image (float32 in range [0, 1]) | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided to normalize_pixels") | |
| # Convert to float32 and normalize to [0, 1] | |
| normalized = image.astype(np.float32) / 255.0 | |
| logger.debug(f"Normalized image: min={normalized.min():.3f}, max={normalized.max():.3f}") | |
| return normalized | |
| def denormalize_pixels(image: np.ndarray) -> np.ndarray: | |
| """ | |
| Denormalize pixels from [0, 1] back to [0, 255] | |
| Converts float32 image back to uint8 for display/saving. | |
| Args: | |
| image: Normalized image (float32 in range [0, 1]) | |
| Returns: | |
| Denormalized image (uint8 in range [0, 255]) | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided to denormalize_pixels") | |
| # Convert to [0, 255] range and uint8 | |
| denormalized = (image * 255.0).clip(0, 255).astype(np.uint8) | |
| return denormalized | |
| def check_image_quality( | |
| image: np.ndarray, | |
| blur_threshold: float = 100.0, | |
| brightness_range: Tuple[float, float] = (30.0, 225.0), | |
| contrast_threshold: float = 30.0 | |
| ) -> Dict[str, float]: | |
| """ | |
| Check image quality metrics (blur, brightness, contrast) | |
| Analyzes the image to detect quality issues that could affect | |
| feature extraction or classification. | |
| Args: | |
| image: Input image (uint8) | |
| blur_threshold: Minimum blur score (Laplacian variance) for sharp image | |
| brightness_range: Acceptable brightness range (min, max) | |
| contrast_threshold: Minimum standard deviation for adequate contrast | |
| Returns: | |
| Dictionary with quality metrics: | |
| - blur_score: Laplacian variance (higher = sharper) | |
| - brightness: Mean pixel value (0-255) | |
| - contrast: Standard deviation of pixels | |
| - is_acceptable: Boolean indicating if image passes all checks | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided to check_image_quality") | |
| # Convert to grayscale for analysis | |
| if len(image.shape) == 3: | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| else: | |
| gray = image | |
| # 1. Blur detection using Laplacian variance | |
| # Higher variance = sharper edges = less blur | |
| laplacian = cv2.Laplacian(gray, cv2.CV_64F) | |
| blur_score = laplacian.var() | |
| # 2. Brightness (mean pixel value) | |
| brightness = gray.mean() | |
| # 3. Contrast (standard deviation of pixel values) | |
| contrast = gray.std() | |
| # Determine if image is acceptable | |
| is_acceptable = ( | |
| blur_score >= blur_threshold and | |
| brightness_range[0] <= brightness <= brightness_range[1] and | |
| contrast >= contrast_threshold | |
| ) | |
| quality_metrics = { | |
| 'blur_score': float(blur_score), | |
| 'brightness': float(brightness), | |
| 'contrast': float(contrast), | |
| 'is_acceptable': is_acceptable | |
| } | |
| if not is_acceptable: | |
| logger.warning( | |
| f"Image quality issues detected - " | |
| f"blur: {blur_score:.1f} (threshold: {blur_threshold}), " | |
| f"brightness: {brightness:.1f} (range: {brightness_range}), " | |
| f"contrast: {contrast:.1f} (threshold: {contrast_threshold})" | |
| ) | |
| return quality_metrics | |
| def adaptive_histogram_equalization(image: np.ndarray, clip_limit: float = 2.0) -> np.ndarray: | |
| """ | |
| Apply CLAHE (Contrast Limited Adaptive Histogram Equalization) | |
| Improves contrast in images with poor lighting conditions. | |
| Args: | |
| image: Input image (BGR) | |
| clip_limit: Threshold for contrast limiting (default: 2.0) | |
| Returns: | |
| Contrast-enhanced image | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided") | |
| # Convert to LAB color space | |
| lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) | |
| # Split channels | |
| l, a, b = cv2.split(lab) | |
| # Apply CLAHE to L channel | |
| clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(8, 8)) | |
| l_enhanced = clahe.apply(l) | |
| # Merge channels | |
| lab_enhanced = cv2.merge([l_enhanced, a, b]) | |
| # Convert back to BGR | |
| enhanced = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR) | |
| logger.debug("Applied adaptive histogram equalization") | |
| return enhanced | |
| def remove_noise(image: np.ndarray, kernel_size: int = 5) -> np.ndarray: | |
| """ | |
| Remove noise from image using bilateral filter | |
| Smooths image while preserving edges. | |
| Args: | |
| image: Input image | |
| kernel_size: Filter kernel size (default: 5) | |
| Returns: | |
| Denoised image | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided") | |
| # Apply bilateral filter (preserves edges while smoothing) | |
| denoised = cv2.bilateralFilter(image, kernel_size, 75, 75) | |
| logger.debug(f"Applied bilateral filter with kernel size {kernel_size}") | |
| return denoised | |
| def auto_rotate_card(image: np.ndarray) -> Tuple[np.ndarray, float]: | |
| """ | |
| Automatically detect and correct card rotation | |
| Detects if card is rotated and corrects to upright position. | |
| Args: | |
| image: Input card image | |
| Returns: | |
| Tuple of (rotated_image, rotation_angle_degrees) | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided") | |
| # Convert to grayscale | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| # Detect edges | |
| edges = cv2.Canny(gray, 50, 150, apertureSize=3) | |
| # Detect lines using Hough transform | |
| lines = cv2.HoughLines(edges, 1, np.pi / 180, 200) | |
| if lines is None or len(lines) == 0: | |
| logger.debug("No lines detected for rotation correction") | |
| return image, 0.0 | |
| # Find dominant angle | |
| angles = [] | |
| for line in lines: | |
| rho, theta = line[0] | |
| angle = np.degrees(theta) | |
| angles.append(angle) | |
| # Get median angle | |
| median_angle = np.median(angles) | |
| # Correct angle to [-45, 45] range | |
| if median_angle > 135: | |
| rotation_angle = median_angle - 180 | |
| elif median_angle > 45: | |
| rotation_angle = median_angle - 90 | |
| else: | |
| rotation_angle = median_angle | |
| # Only rotate if angle is significant (> 2 degrees) | |
| if abs(rotation_angle) < 2: | |
| return image, 0.0 | |
| # Rotate image | |
| height, width = image.shape[:2] | |
| center = (width // 2, height // 2) | |
| rotation_matrix = cv2.getRotationMatrix2D(center, rotation_angle, 1.0) | |
| rotated = cv2.warpAffine( | |
| image, | |
| rotation_matrix, | |
| (width, height), | |
| flags=cv2.INTER_CUBIC, | |
| borderMode=cv2.BORDER_REPLICATE | |
| ) | |
| logger.debug(f"Rotated image by {rotation_angle:.2f} degrees") | |
| return rotated, rotation_angle | |
| def crop_to_content(image: np.ndarray, padding: int = 10) -> np.ndarray: | |
| """ | |
| Crop image to content (remove large uniform borders) | |
| Args: | |
| image: Input image | |
| padding: Pixels to add around detected content (default: 10) | |
| Returns: | |
| Cropped image | |
| """ | |
| if image is None or image.size == 0: | |
| raise ValueError("Empty or None image provided") | |
| # Convert to grayscale | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| # Threshold to find content | |
| _, thresh = cv2.threshold(gray, 1, 255, cv2.THRESH_BINARY) | |
| # Find bounding box of content | |
| coords = cv2.findNonZero(thresh) | |
| if coords is None: | |
| logger.warning("No content found in image") | |
| return image | |
| x, y, w, h = cv2.boundingRect(coords) | |
| # Add padding | |
| height, width = image.shape[:2] | |
| x = max(0, x - padding) | |
| y = max(0, y - padding) | |
| w = min(width - x, w + 2 * padding) | |
| h = min(height - y, h + 2 * padding) | |
| # Crop image | |
| cropped = image[y:y+h, x:x+w] | |
| logger.debug(f"Cropped to content: {w}×{h}") | |
| return cropped | |