| import cv2 |
| import numpy as np |
|
|
| def preprocess_image(image_path_or_array, target_size=(1024, 32)): |
| """ |
| Preprocess the image for handwritten text recognition. |
| 1. Read image as grayscale |
| 2. Resize while maintaining aspect ratio (padding with white) |
| 3. Apply binarization / normalization |
| """ |
| if isinstance(image_path_or_array, str): |
| img = cv2.imread(image_path_or_array, cv2.IMREAD_GRAYSCALE) |
| if img is None: |
| raise FileNotFoundError(f"Could not read image at {image_path_or_array}") |
| else: |
| if len(image_path_or_array.shape) == 3: |
| img = cv2.cvtColor(image_path_or_array, cv2.COLOR_BGR2GRAY) |
| else: |
| img = image_path_or_array.copy() |
|
|
| |
| |
| |
| |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8)) |
| img = clahe.apply(img) |
|
|
| |
| h, w = img.shape |
| target_w, target_h = target_size |
| |
| |
| ratio_w = target_w / w |
| ratio_h = target_h / h |
| ratio = min(ratio_w, ratio_h) |
| |
| new_w = int(w * ratio) |
| new_h = int(h * ratio) |
| |
| |
| if new_w == 0 or new_h == 0: |
| return np.ones((target_h, target_w), dtype=np.uint8) * 255 |
| |
| img_resized = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA) |
| |
| |
| target_img = np.ones((target_h, target_w), dtype=np.uint8) * 255 |
| |
| |
| |
| pad_y = (target_h - new_h) // 2 |
| pad_x = 0 |
| |
| |
| target_img[pad_y:pad_y+new_h, pad_x:pad_x+new_w] = img_resized |
| |
| |
| return target_img |
|
|
| def deskew(img): |
| """ |
| Deskew the image using image moments. |
| """ |
| m = cv2.moments(img) |
| if abs(m['mu02']) < 1e-2: |
| return img.copy() |
| |
| skew = m['mu11'] / m['mu02'] |
| M = np.float32([[1, skew, -0.5 * img.shape[0] * skew], [0, 1, 0]]) |
| img_deskewed = cv2.warpAffine(img, M, (img.shape[1], img.shape[0]), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REPLICATE) |
| return img_deskewed |
|
|
| if __name__ == "__main__": |
| |
| print("Preprocessing module ready.") |
|
|