Spaces:

Arsh124
/

RenAI

Sleeping

App Files Files Community

RenAI / utils /preprocessing.py

Arsh124

Initial RenAI app

ebcc7d1 3 months ago

raw

history blame contribute delete

7.67 kB

	import cv2
	import numpy as np
	import matplotlib.pyplot as plt
	import os
	from deskew import determine_skew
	from typing import Tuple, Union
	import math
	from loguru import logger

	def preprocessImage(image):
	"""
	Preprocesses an image by applying various image processing steps such as denoising, thresholding,
	and removal of horizontal and vertical lines, and saves the final processed image.

	Args:
	- image_path (str): The file path to the input image to be processed.
	- folder_path (str): The directory where the final processed image will be saved.

	Returns:
	- str: The path of the final processed image.
	"""

	# Convert the image to grayscale
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Apply denoising
	gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)

	# Apply binary thresholding using Otsu's method
	thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

	# Copy the original image to preserve it
	removed = image.copy()

	# Remove vertical lines
	vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
	remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
	cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cnts = cnts[0] if len(cnts) == 2 else cnts[1]
	for c in cnts:
	cv2.drawContours(removed, [c], -1, (255, 255, 255), 4)

	# Remove horizontal lines
	horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
	remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
	cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cnts = cnts[0] if len(cnts) == 2 else cnts[1]
	for c in cnts:
	cv2.drawContours(removed, [c], -1, (255, 255, 255), 5)

	# Repair kernel
	repair_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
	removed = 255 - removed
	dilate = cv2.dilate(removed, repair_kernel, iterations=5)
	dilate = cv2.cvtColor(dilate, cv2.COLOR_BGR2GRAY)
	pre_result = cv2.bitwise_and(dilate, thresh)

	# Final result
	result = cv2.morphologyEx(pre_result, cv2.MORPH_CLOSE, repair_kernel, iterations=5)
	final = cv2.bitwise_and(result, thresh)

	# Invert the final image
	invert_final = 255 - final

	# processed_image_path = os.path.join(folder_path, f"{os.path.splitext(os.path.basename(image_path))[0]}-preprocessed.png")
	# Save the final image
	# cv2.imwrite(processed_image_path, invert_final)

	return invert_final

	def process_segment_and_crop_image(model, image, preprocess_image_path, padding=10, min_contour_area=100):
	"""
	Processes an image for segmentation using a U-Net model and crops the original image based on the largest contour.

	Args:
	- model (tf.keras.Model): Trained U-Net model for image segmentation.
	- img_path (str): Path to the original image.
	- preprocess_image_path (str): Path to the preprocessed image.
	- output_folder (str): Folder to save the cropped image.
	- padding (int): Padding around the detected region.
	- min_contour_area (int): Minimum contour area to be considered for cropping.

	Returns:
	- str: The path of the cropped image.
	"""
	# Read the original image in grayscale

	img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Apply thresholding to create a binary image
	_, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)

	# Resize the image to the model input size (512x512)
	img = cv2.resize(img, (512, 512))

	# Expand dimensions to match model input
	img = np.expand_dims(img, axis=-1)
	img_np = np.expand_dims(img, axis=0)

	# Predict the segmentation mask using the U-Net model
	pred = model.predict(img_np)
	pred = np.squeeze(np.squeeze(pred, axis=0), axis=-1)

	# # Display the segmentation result
	# plt.imshow(pred, cmap='gray')
	# plt.title('U-Net Segmentation')
	# plt.axis('off')
	# plt.show()

	# Read the original image
	original_img = cv2.imread(preprocess_image_path)

	# Get original dimensions
	ori_height, ori_width = original_img.shape[:2]

	# Resize the mask to match the original image dimensions
	resized_mask = cv2.resize(pred, (ori_width, ori_height))

	# Convert the resized mask to 8-bit unsigned integer type
	resized_mask = (resized_mask * 255).astype(np.uint8)

	# Apply Otsu's threshold to get a binary image
	_, binary_mask = cv2.threshold(resized_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

	# Apply morphological operations to remove noise and connect nearby text
	kernel = np.ones((5, 5), np.uint8)
	cleaned_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
	cleaned_mask = cv2.morphologyEx(cleaned_mask, cv2.MORPH_OPEN, kernel)

	# Find contours in the cleaned mask
	contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Filter contours based on area to remove small noise
	valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area]

	if not valid_contours:
	print("No valid text regions found.")
	return None

	# Find the bounding rectangle that encompasses all valid contours
	x_min, y_min = ori_width, ori_height
	x_max, y_max = 0, 0

	for contour in valid_contours:
	x, y, w, h = cv2.boundingRect(contour)
	x_min = min(x_min, x)
	y_min = min(y_min, y)
	x_max = max(x_max, x + w)
	y_max = max(y_max, y + h)

	x_min = max(0, x_min - padding)
	y_min = max(0, y_min - padding)
	x_max = min(ori_width, x_max + padding)
	y_max = min(ori_height, y_max + padding)

	# Crop the original image
	cropped_img = original_img[y_min:y_max, x_min:x_max]

	return cropped_img


	def postProcessImage(cropped_image):
	"""
	Post-processes an image by deskewing, sharpening, and applying morphological dilation, then saves the final processed image.

	Args:
	- image_path (str): Path to the original image.
	- cropped_image_path (str): Path to the cropped image to be post-processed.
	- output_folder (str): Directory where the final post-processed image will be saved.

	Returns:
	- str: The path of the final post-processed image.
	"""
	def rotate(
	image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]]
	) -> np.ndarray:
	old_width, old_height = image.shape[:2]
	angle_radian = math.radians(angle)
	width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width)
	height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height)

	image_center = tuple(np.array(image.shape[1::-1]) / 2)
	rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
	rot_mat[1, 2] += (width - old_width) / 2
	rot_mat[0, 2] += (height - old_height) / 2
	return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background)

	# Deskew Image
	# grayscale = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
	# angle = determine_skew(grayscale)
	# rotated = rotate(image, angle, (0, 0, 0))
	rotated = cropped_image

	# Sharpening (reduced intensity)
	blurred = cv2.GaussianBlur(rotated, (1,1), sigmaX=3, sigmaY=3)
	sharpened = cv2.addWeighted(rotated, 1.5, blurred, -0.5, 0)

	# Morphological dilation to thicken the text
	dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
	dilated = cv2.dilate(sharpened, dilate_kernel, iterations=1)

	return sharpened