File size: 7,674 Bytes
ebcc7d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from deskew import determine_skew
from typing import Tuple, Union
import math
from loguru import logger
def preprocessImage(image):
"""
Preprocesses an image by applying various image processing steps such as denoising, thresholding,
and removal of horizontal and vertical lines, and saves the final processed image.
Args:
- image_path (str): The file path to the input image to be processed.
- folder_path (str): The directory where the final processed image will be saved.
Returns:
- str: The path of the final processed image.
"""
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply denoising
gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Apply binary thresholding using Otsu's method
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Copy the original image to preserve it
removed = image.copy()
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(removed, [c], -1, (255, 255, 255), 4)
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(removed, [c], -1, (255, 255, 255), 5)
# Repair kernel
repair_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
removed = 255 - removed
dilate = cv2.dilate(removed, repair_kernel, iterations=5)
dilate = cv2.cvtColor(dilate, cv2.COLOR_BGR2GRAY)
pre_result = cv2.bitwise_and(dilate, thresh)
# Final result
result = cv2.morphologyEx(pre_result, cv2.MORPH_CLOSE, repair_kernel, iterations=5)
final = cv2.bitwise_and(result, thresh)
# Invert the final image
invert_final = 255 - final
# processed_image_path = os.path.join(folder_path, f"{os.path.splitext(os.path.basename(image_path))[0]}-preprocessed.png")
# Save the final image
# cv2.imwrite(processed_image_path, invert_final)
return invert_final
def process_segment_and_crop_image(model, image, preprocess_image_path, padding=10, min_contour_area=100):
"""
Processes an image for segmentation using a U-Net model and crops the original image based on the largest contour.
Args:
- model (tf.keras.Model): Trained U-Net model for image segmentation.
- img_path (str): Path to the original image.
- preprocess_image_path (str): Path to the preprocessed image.
- output_folder (str): Folder to save the cropped image.
- padding (int): Padding around the detected region.
- min_contour_area (int): Minimum contour area to be considered for cropping.
Returns:
- str: The path of the cropped image.
"""
# Read the original image in grayscale
img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply thresholding to create a binary image
_, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
# Resize the image to the model input size (512x512)
img = cv2.resize(img, (512, 512))
# Expand dimensions to match model input
img = np.expand_dims(img, axis=-1)
img_np = np.expand_dims(img, axis=0)
# Predict the segmentation mask using the U-Net model
pred = model.predict(img_np)
pred = np.squeeze(np.squeeze(pred, axis=0), axis=-1)
# # Display the segmentation result
# plt.imshow(pred, cmap='gray')
# plt.title('U-Net Segmentation')
# plt.axis('off')
# plt.show()
# Read the original image
original_img = cv2.imread(preprocess_image_path)
# Get original dimensions
ori_height, ori_width = original_img.shape[:2]
# Resize the mask to match the original image dimensions
resized_mask = cv2.resize(pred, (ori_width, ori_height))
# Convert the resized mask to 8-bit unsigned integer type
resized_mask = (resized_mask * 255).astype(np.uint8)
# Apply Otsu's threshold to get a binary image
_, binary_mask = cv2.threshold(resized_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Apply morphological operations to remove noise and connect nearby text
kernel = np.ones((5, 5), np.uint8)
cleaned_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
cleaned_mask = cv2.morphologyEx(cleaned_mask, cv2.MORPH_OPEN, kernel)
# Find contours in the cleaned mask
contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Filter contours based on area to remove small noise
valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area]
if not valid_contours:
print("No valid text regions found.")
return None
# Find the bounding rectangle that encompasses all valid contours
x_min, y_min = ori_width, ori_height
x_max, y_max = 0, 0
for contour in valid_contours:
x, y, w, h = cv2.boundingRect(contour)
x_min = min(x_min, x)
y_min = min(y_min, y)
x_max = max(x_max, x + w)
y_max = max(y_max, y + h)
x_min = max(0, x_min - padding)
y_min = max(0, y_min - padding)
x_max = min(ori_width, x_max + padding)
y_max = min(ori_height, y_max + padding)
# Crop the original image
cropped_img = original_img[y_min:y_max, x_min:x_max]
return cropped_img
def postProcessImage(cropped_image):
"""
Post-processes an image by deskewing, sharpening, and applying morphological dilation, then saves the final processed image.
Args:
- image_path (str): Path to the original image.
- cropped_image_path (str): Path to the cropped image to be post-processed.
- output_folder (str): Directory where the final post-processed image will be saved.
Returns:
- str: The path of the final post-processed image.
"""
def rotate(
image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]]
) -> np.ndarray:
old_width, old_height = image.shape[:2]
angle_radian = math.radians(angle)
width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width)
height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height)
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
rot_mat[1, 2] += (width - old_width) / 2
rot_mat[0, 2] += (height - old_height) / 2
return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background)
# Deskew Image
# grayscale = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
# angle = determine_skew(grayscale)
# rotated = rotate(image, angle, (0, 0, 0))
rotated = cropped_image
# Sharpening (reduced intensity)
blurred = cv2.GaussianBlur(rotated, (1,1), sigmaX=3, sigmaY=3)
sharpened = cv2.addWeighted(rotated, 1.5, blurred, -0.5, 0)
# Morphological dilation to thicken the text
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
dilated = cv2.dilate(sharpened, dilate_kernel, iterations=1)
return sharpened |