import cv2 import numpy as np import matplotlib.pyplot as plt import os from deskew import determine_skew from typing import Tuple, Union import math from loguru import logger def preprocessImage(image): """ Preprocesses an image by applying various image processing steps such as denoising, thresholding, and removal of horizontal and vertical lines, and saves the final processed image. Args: - image_path (str): The file path to the input image to be processed. - folder_path (str): The directory where the final processed image will be saved. Returns: - str: The path of the final processed image. """ # Convert the image to grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Apply denoising gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21) # Apply binary thresholding using Otsu's method thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] # Copy the original image to preserve it removed = image.copy() # Remove vertical lines vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40)) remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2) cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: cv2.drawContours(removed, [c], -1, (255, 255, 255), 4) # Remove horizontal lines horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1)) remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2) cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnts = cnts[0] if len(cnts) == 2 else cnts[1] for c in cnts: cv2.drawContours(removed, [c], -1, (255, 255, 255), 5) # Repair kernel repair_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) removed = 255 - removed dilate = cv2.dilate(removed, repair_kernel, iterations=5) dilate = cv2.cvtColor(dilate, cv2.COLOR_BGR2GRAY) pre_result = cv2.bitwise_and(dilate, thresh) # Final result result = cv2.morphologyEx(pre_result, cv2.MORPH_CLOSE, repair_kernel, iterations=5) final = cv2.bitwise_and(result, thresh) # Invert the final image invert_final = 255 - final # processed_image_path = os.path.join(folder_path, f"{os.path.splitext(os.path.basename(image_path))[0]}-preprocessed.png") # Save the final image # cv2.imwrite(processed_image_path, invert_final) return invert_final def process_segment_and_crop_image(model, image, preprocess_image_path, padding=10, min_contour_area=100): """ Processes an image for segmentation using a U-Net model and crops the original image based on the largest contour. Args: - model (tf.keras.Model): Trained U-Net model for image segmentation. - img_path (str): Path to the original image. - preprocess_image_path (str): Path to the preprocessed image. - output_folder (str): Folder to save the cropped image. - padding (int): Padding around the detected region. - min_contour_area (int): Minimum contour area to be considered for cropping. Returns: - str: The path of the cropped image. """ # Read the original image in grayscale img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Apply thresholding to create a binary image _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV) # Resize the image to the model input size (512x512) img = cv2.resize(img, (512, 512)) # Expand dimensions to match model input img = np.expand_dims(img, axis=-1) img_np = np.expand_dims(img, axis=0) # Predict the segmentation mask using the U-Net model pred = model.predict(img_np) pred = np.squeeze(np.squeeze(pred, axis=0), axis=-1) # # Display the segmentation result # plt.imshow(pred, cmap='gray') # plt.title('U-Net Segmentation') # plt.axis('off') # plt.show() # Read the original image original_img = cv2.imread(preprocess_image_path) # Get original dimensions ori_height, ori_width = original_img.shape[:2] # Resize the mask to match the original image dimensions resized_mask = cv2.resize(pred, (ori_width, ori_height)) # Convert the resized mask to 8-bit unsigned integer type resized_mask = (resized_mask * 255).astype(np.uint8) # Apply Otsu's threshold to get a binary image _, binary_mask = cv2.threshold(resized_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Apply morphological operations to remove noise and connect nearby text kernel = np.ones((5, 5), np.uint8) cleaned_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel) cleaned_mask = cv2.morphologyEx(cleaned_mask, cv2.MORPH_OPEN, kernel) # Find contours in the cleaned mask contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Filter contours based on area to remove small noise valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area] if not valid_contours: print("No valid text regions found.") return None # Find the bounding rectangle that encompasses all valid contours x_min, y_min = ori_width, ori_height x_max, y_max = 0, 0 for contour in valid_contours: x, y, w, h = cv2.boundingRect(contour) x_min = min(x_min, x) y_min = min(y_min, y) x_max = max(x_max, x + w) y_max = max(y_max, y + h) x_min = max(0, x_min - padding) y_min = max(0, y_min - padding) x_max = min(ori_width, x_max + padding) y_max = min(ori_height, y_max + padding) # Crop the original image cropped_img = original_img[y_min:y_max, x_min:x_max] return cropped_img def postProcessImage(cropped_image): """ Post-processes an image by deskewing, sharpening, and applying morphological dilation, then saves the final processed image. Args: - image_path (str): Path to the original image. - cropped_image_path (str): Path to the cropped image to be post-processed. - output_folder (str): Directory where the final post-processed image will be saved. Returns: - str: The path of the final post-processed image. """ def rotate( image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]] ) -> np.ndarray: old_width, old_height = image.shape[:2] angle_radian = math.radians(angle) width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width) height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height) image_center = tuple(np.array(image.shape[1::-1]) / 2) rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0) rot_mat[1, 2] += (width - old_width) / 2 rot_mat[0, 2] += (height - old_height) / 2 return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background) # Deskew Image # grayscale = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY) # angle = determine_skew(grayscale) # rotated = rotate(image, angle, (0, 0, 0)) rotated = cropped_image # Sharpening (reduced intensity) blurred = cv2.GaussianBlur(rotated, (1,1), sigmaX=3, sigmaY=3) sharpened = cv2.addWeighted(rotated, 1.5, blurred, -0.5, 0) # Morphological dilation to thicken the text dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1)) dilated = cv2.dilate(sharpened, dilate_kernel, iterations=1) return sharpened