File size: 7,674 Bytes
ebcc7d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from deskew import determine_skew
from typing import Tuple, Union
import math
from loguru import logger

def preprocessImage(image):
    """
    Preprocesses an image by applying various image processing steps such as denoising, thresholding,
    and removal of horizontal and vertical lines, and saves the final processed image.

    Args:
    - image_path (str): The file path to the input image to be processed.
    - folder_path (str): The directory where the final processed image will be saved.

    Returns:
    - str: The path of the final processed image.
    """

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply denoising
    gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)

    # Apply binary thresholding using Otsu's method
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Copy the original image to preserve it
    removed = image.copy()

    # Remove vertical lines
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
    remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(removed, [c], -1, (255, 255, 255), 4)

    # Remove horizontal lines
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
    remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(removed, [c], -1, (255, 255, 255), 5)

    # Repair kernel
    repair_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    removed = 255 - removed
    dilate = cv2.dilate(removed, repair_kernel, iterations=5)
    dilate = cv2.cvtColor(dilate, cv2.COLOR_BGR2GRAY)
    pre_result = cv2.bitwise_and(dilate, thresh)

    # Final result
    result = cv2.morphologyEx(pre_result, cv2.MORPH_CLOSE, repair_kernel, iterations=5)
    final = cv2.bitwise_and(result, thresh)

    # Invert the final image
    invert_final = 255 - final

    # processed_image_path = os.path.join(folder_path, f"{os.path.splitext(os.path.basename(image_path))[0]}-preprocessed.png")
    # Save the final image
    # cv2.imwrite(processed_image_path, invert_final)
    
    return invert_final

def process_segment_and_crop_image(model, image, preprocess_image_path, padding=10, min_contour_area=100):
    """
    Processes an image for segmentation using a U-Net model and crops the original image based on the largest contour.

    Args:
    - model (tf.keras.Model): Trained U-Net model for image segmentation.
    - img_path (str): Path to the original image.
    - preprocess_image_path (str): Path to the preprocessed image.
    - output_folder (str): Folder to save the cropped image.
    - padding (int): Padding around the detected region.
    - min_contour_area (int): Minimum contour area to be considered for cropping.

    Returns:
    - str: The path of the cropped image.
    """
    # Read the original image in grayscale

    img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply thresholding to create a binary image
    _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)

    # Resize the image to the model input size (512x512)
    img = cv2.resize(img, (512, 512))

    # Expand dimensions to match model input
    img = np.expand_dims(img, axis=-1)
    img_np = np.expand_dims(img, axis=0)

    # Predict the segmentation mask using the U-Net model
    pred = model.predict(img_np)
    pred = np.squeeze(np.squeeze(pred, axis=0), axis=-1)

    # # Display the segmentation result
    # plt.imshow(pred, cmap='gray')
    # plt.title('U-Net Segmentation')
    # plt.axis('off')
    # plt.show()

    # Read the original image
    original_img = cv2.imread(preprocess_image_path)

    # Get original dimensions
    ori_height, ori_width = original_img.shape[:2]

    # Resize the mask to match the original image dimensions
    resized_mask = cv2.resize(pred, (ori_width, ori_height))

    # Convert the resized mask to 8-bit unsigned integer type
    resized_mask = (resized_mask * 255).astype(np.uint8)

    # Apply Otsu's threshold to get a binary image
    _, binary_mask = cv2.threshold(resized_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Apply morphological operations to remove noise and connect nearby text
    kernel = np.ones((5, 5), np.uint8)
    cleaned_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
    cleaned_mask = cv2.morphologyEx(cleaned_mask, cv2.MORPH_OPEN, kernel)

    # Find contours in the cleaned mask
    contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter contours based on area to remove small noise
    valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area]

    if not valid_contours:
        print("No valid text regions found.")
        return None

    # Find the bounding rectangle that encompasses all valid contours
    x_min, y_min = ori_width, ori_height
    x_max, y_max = 0, 0

    for contour in valid_contours:
        x, y, w, h = cv2.boundingRect(contour)
        x_min = min(x_min, x)
        y_min = min(y_min, y)
        x_max = max(x_max, x + w)
        y_max = max(y_max, y + h)

    x_min = max(0, x_min - padding)
    y_min = max(0, y_min - padding)
    x_max = min(ori_width, x_max + padding)
    y_max = min(ori_height, y_max + padding)

    # Crop the original image
    cropped_img = original_img[y_min:y_max, x_min:x_max]

    return cropped_img


def postProcessImage(cropped_image):
    """
    Post-processes an image by deskewing, sharpening, and applying morphological dilation, then saves the final processed image.

    Args:
    - image_path (str): Path to the original image.
    - cropped_image_path (str): Path to the cropped image to be post-processed.
    - output_folder (str): Directory where the final post-processed image will be saved.

    Returns:
    - str: The path of the final post-processed image.
    """
    def rotate(
        image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]]
    ) -> np.ndarray:
        old_width, old_height = image.shape[:2]
        angle_radian = math.radians(angle)
        width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width)
        height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height)

        image_center = tuple(np.array(image.shape[1::-1]) / 2)
        rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
        rot_mat[1, 2] += (width - old_width) / 2
        rot_mat[0, 2] += (height - old_height) / 2
        return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background)

    # Deskew Image
    # grayscale = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    # angle = determine_skew(grayscale)
    # rotated = rotate(image, angle, (0, 0, 0))
    rotated = cropped_image

    # Sharpening (reduced intensity)
    blurred = cv2.GaussianBlur(rotated, (1,1), sigmaX=3, sigmaY=3)
    sharpened = cv2.addWeighted(rotated, 1.5, blurred, -0.5, 0)

    # Morphological dilation to thicken the text
    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    dilated = cv2.dilate(sharpened, dilate_kernel, iterations=1)

    return sharpened