""" Preprocessing functions to improve OCR accuracy Includes various image enhancement techniques """ import cv2 import numpy as np def convert_to_grayscale(img): """Convert image to grayscale""" if len(img.shape) == 3: return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) return img def apply_thresholding(img, method='otsu'): """ Apply thresholding to image Methods: - 'otsu': Otsu's automatic thresholding - 'adaptive': Adaptive thresholding - 'binary': Simple binary thresholding """ gray = convert_to_grayscale(img) if method == 'otsu': # Otsu's thresholding - automatic threshold selection _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) elif method == 'adaptive': # Adaptive thresholding - good for varying lighting thresh = cv2.adaptiveThreshold( gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2 ) elif method == 'binary': # Simple binary thresholding _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY) else: thresh = gray return thresh def remove_noise(img, method='median'): """ Remove noise from image Methods: - 'median': Median blur (good for salt-and-pepper noise) - 'gaussian': Gaussian blur (general smoothing) - 'bilateral': Bilateral filter (preserves edges) """ if method == 'median': return cv2.medianBlur(img, 3) elif method == 'gaussian': return cv2.GaussianBlur(img, (5, 5), 0) elif method == 'bilateral': return cv2.bilateralFilter(img, 9, 75, 75) return img def dilate_text(img, kernel_size=(1, 1)): """Dilate text to make it thicker""" kernel = np.ones(kernel_size, np.uint8) return cv2.dilate(img, kernel, iterations=1) def erode_text(img, kernel_size=(1, 1)): """Erode text to make it thinner""" kernel = np.ones(kernel_size, np.uint8) return cv2.erode(img, kernel, iterations=1) def invert_image(img): """Invert image colors (useful if text is white on black)""" return cv2.bitwise_not(img) def enhance_contrast(img): """Enhance image contrast using CLAHE""" gray = convert_to_grayscale(img) clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) return clahe.apply(gray) def resize_image(img, scale=2.0): """ Resize image for better OCR Larger images often work better with Tesseract """ height, width = img.shape[:2] new_width = int(width * scale) new_height = int(height * scale) return cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC) def add_border(img, border_size=10, color=255): """Add white border around image""" return cv2.copyMakeBorder( img, border_size, border_size, border_size, border_size, cv2.BORDER_CONSTANT, value=color ) def preprocess_pipeline(img, config='default'): """ Complete preprocessing pipeline Configs: - 'default': Standard preprocessing - 'aggressive': More aggressive preprocessing - 'light': Light preprocessing - 'custom': Custom pipeline """ if config == 'default': # Standard pipeline processed = convert_to_grayscale(img) processed = remove_noise(processed, 'median') processed = apply_thresholding(processed, 'otsu') processed = add_border(processed, 10) elif config == 'aggressive': # Aggressive preprocessing processed = convert_to_grayscale(img) processed = enhance_contrast(processed) processed = remove_noise(processed, 'bilateral') processed = apply_thresholding(processed, 'adaptive') processed = dilate_text(processed, (2, 2)) processed = add_border(processed, 15) elif config == 'light': # Light preprocessing processed = convert_to_grayscale(img) processed = apply_thresholding(processed, 'otsu') elif config == 'upscale': # Upscale and process processed = resize_image(img, scale=3.0) processed = convert_to_grayscale(processed) processed = remove_noise(processed, 'median') processed = apply_thresholding(processed, 'otsu') processed = add_border(processed, 20) else: # No preprocessing processed = img return processed def preprocess_for_ocr(img, show_steps=False): """ Optimized preprocessing for OCR Returns preprocessed image ready for Tesseract """ steps = {} # Step 1: Convert to grayscale gray = convert_to_grayscale(img) if show_steps: steps['1_grayscale'] = gray.copy() # Step 2: Upscale image (Tesseract works better with larger images) upscaled = resize_image(gray, scale=2.5) if show_steps: steps['2_upscaled'] = upscaled.copy() # Step 3: Remove noise denoised = remove_noise(upscaled, 'bilateral') if show_steps: steps['3_denoised'] = denoised.copy() # Step 4: Apply thresholding thresh = apply_thresholding(denoised, 'otsu') if show_steps: steps['4_threshold'] = thresh.copy() # Step 5: Add border bordered = add_border(thresh, 20) if show_steps: steps['5_bordered'] = bordered.copy() if show_steps: return bordered, steps return bordered