RenAI / utils /preprocessing.py
Arsh124's picture
Initial RenAI app
ebcc7d1
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from deskew import determine_skew
from typing import Tuple, Union
import math
from loguru import logger
def preprocessImage(image):
"""
Preprocesses an image by applying various image processing steps such as denoising, thresholding,
and removal of horizontal and vertical lines, and saves the final processed image.
Args:
- image_path (str): The file path to the input image to be processed.
- folder_path (str): The directory where the final processed image will be saved.
Returns:
- str: The path of the final processed image.
"""
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply denoising
gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)
# Apply binary thresholding using Otsu's method
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Copy the original image to preserve it
removed = image.copy()
# Remove vertical lines
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(removed, [c], -1, (255, 255, 255), 4)
# Remove horizontal lines
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
cv2.drawContours(removed, [c], -1, (255, 255, 255), 5)
# Repair kernel
repair_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
removed = 255 - removed
dilate = cv2.dilate(removed, repair_kernel, iterations=5)
dilate = cv2.cvtColor(dilate, cv2.COLOR_BGR2GRAY)
pre_result = cv2.bitwise_and(dilate, thresh)
# Final result
result = cv2.morphologyEx(pre_result, cv2.MORPH_CLOSE, repair_kernel, iterations=5)
final = cv2.bitwise_and(result, thresh)
# Invert the final image
invert_final = 255 - final
# processed_image_path = os.path.join(folder_path, f"{os.path.splitext(os.path.basename(image_path))[0]}-preprocessed.png")
# Save the final image
# cv2.imwrite(processed_image_path, invert_final)
return invert_final
def process_segment_and_crop_image(model, image, preprocess_image_path, padding=10, min_contour_area=100):
"""
Processes an image for segmentation using a U-Net model and crops the original image based on the largest contour.
Args:
- model (tf.keras.Model): Trained U-Net model for image segmentation.
- img_path (str): Path to the original image.
- preprocess_image_path (str): Path to the preprocessed image.
- output_folder (str): Folder to save the cropped image.
- padding (int): Padding around the detected region.
- min_contour_area (int): Minimum contour area to be considered for cropping.
Returns:
- str: The path of the cropped image.
"""
# Read the original image in grayscale
img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply thresholding to create a binary image
_, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)
# Resize the image to the model input size (512x512)
img = cv2.resize(img, (512, 512))
# Expand dimensions to match model input
img = np.expand_dims(img, axis=-1)
img_np = np.expand_dims(img, axis=0)
# Predict the segmentation mask using the U-Net model
pred = model.predict(img_np)
pred = np.squeeze(np.squeeze(pred, axis=0), axis=-1)
# # Display the segmentation result
# plt.imshow(pred, cmap='gray')
# plt.title('U-Net Segmentation')
# plt.axis('off')
# plt.show()
# Read the original image
original_img = cv2.imread(preprocess_image_path)
# Get original dimensions
ori_height, ori_width = original_img.shape[:2]
# Resize the mask to match the original image dimensions
resized_mask = cv2.resize(pred, (ori_width, ori_height))
# Convert the resized mask to 8-bit unsigned integer type
resized_mask = (resized_mask * 255).astype(np.uint8)
# Apply Otsu's threshold to get a binary image
_, binary_mask = cv2.threshold(resized_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Apply morphological operations to remove noise and connect nearby text
kernel = np.ones((5, 5), np.uint8)
cleaned_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
cleaned_mask = cv2.morphologyEx(cleaned_mask, cv2.MORPH_OPEN, kernel)
# Find contours in the cleaned mask
contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Filter contours based on area to remove small noise
valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area]
if not valid_contours:
print("No valid text regions found.")
return None
# Find the bounding rectangle that encompasses all valid contours
x_min, y_min = ori_width, ori_height
x_max, y_max = 0, 0
for contour in valid_contours:
x, y, w, h = cv2.boundingRect(contour)
x_min = min(x_min, x)
y_min = min(y_min, y)
x_max = max(x_max, x + w)
y_max = max(y_max, y + h)
x_min = max(0, x_min - padding)
y_min = max(0, y_min - padding)
x_max = min(ori_width, x_max + padding)
y_max = min(ori_height, y_max + padding)
# Crop the original image
cropped_img = original_img[y_min:y_max, x_min:x_max]
return cropped_img
def postProcessImage(cropped_image):
"""
Post-processes an image by deskewing, sharpening, and applying morphological dilation, then saves the final processed image.
Args:
- image_path (str): Path to the original image.
- cropped_image_path (str): Path to the cropped image to be post-processed.
- output_folder (str): Directory where the final post-processed image will be saved.
Returns:
- str: The path of the final post-processed image.
"""
def rotate(
image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]]
) -> np.ndarray:
old_width, old_height = image.shape[:2]
angle_radian = math.radians(angle)
width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width)
height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height)
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
rot_mat[1, 2] += (width - old_width) / 2
rot_mat[0, 2] += (height - old_height) / 2
return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background)
# Deskew Image
# grayscale = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
# angle = determine_skew(grayscale)
# rotated = rotate(image, angle, (0, 0, 0))
rotated = cropped_image
# Sharpening (reduced intensity)
blurred = cv2.GaussianBlur(rotated, (1,1), sigmaX=3, sigmaY=3)
sharpened = cv2.addWeighted(rotated, 1.5, blurred, -0.5, 0)
# Morphological dilation to thicken the text
dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
dilated = cv2.dilate(sharpened, dilate_kernel, iterations=1)
return sharpened