Spaces:

Arsh124
/

RenAI

Sleeping

File size: 7,674 Bytes

ebcc7d1

import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from deskew import determine_skew
from typing import Tuple, Union
import math
from loguru import logger

def preprocessImage(image):
    """
    Preprocesses an image by applying various image processing steps such as denoising, thresholding,
    and removal of horizontal and vertical lines, and saves the final processed image.

    Args:
    - image_path (str): The file path to the input image to be processed.
    - folder_path (str): The directory where the final processed image will be saved.

    Returns:
    - str: The path of the final processed image.
    """

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply denoising
    gray = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)

    # Apply binary thresholding using Otsu's method
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Copy the original image to preserve it
    removed = image.copy()

    # Remove vertical lines
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
    remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(removed, [c], -1, (255, 255, 255), 4)

    # Remove horizontal lines
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
    remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(removed, [c], -1, (255, 255, 255), 5)

    # Repair kernel
    repair_kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    removed = 255 - removed
    dilate = cv2.dilate(removed, repair_kernel, iterations=5)
    dilate = cv2.cvtColor(dilate, cv2.COLOR_BGR2GRAY)
    pre_result = cv2.bitwise_and(dilate, thresh)

    # Final result
    result = cv2.morphologyEx(pre_result, cv2.MORPH_CLOSE, repair_kernel, iterations=5)
    final = cv2.bitwise_and(result, thresh)

    # Invert the final image
    invert_final = 255 - final

    # processed_image_path = os.path.join(folder_path, f"{os.path.splitext(os.path.basename(image_path))[0]}-preprocessed.png")
    # Save the final image
    # cv2.imwrite(processed_image_path, invert_final)
    
    return invert_final

def process_segment_and_crop_image(model, image, preprocess_image_path, padding=10, min_contour_area=100):
    """
    Processes an image for segmentation using a U-Net model and crops the original image based on the largest contour.

    Args:
    - model (tf.keras.Model): Trained U-Net model for image segmentation.
    - img_path (str): Path to the original image.
    - preprocess_image_path (str): Path to the preprocessed image.
    - output_folder (str): Folder to save the cropped image.
    - padding (int): Padding around the detected region.
    - min_contour_area (int): Minimum contour area to be considered for cropping.

    Returns:
    - str: The path of the cropped image.
    """
    # Read the original image in grayscale

    img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply thresholding to create a binary image
    _, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)

    # Resize the image to the model input size (512x512)
    img = cv2.resize(img, (512, 512))

    # Expand dimensions to match model input
    img = np.expand_dims(img, axis=-1)
    img_np = np.expand_dims(img, axis=0)

    # Predict the segmentation mask using the U-Net model
    pred = model.predict(img_np)
    pred = np.squeeze(np.squeeze(pred, axis=0), axis=-1)

    # # Display the segmentation result
    # plt.imshow(pred, cmap='gray')
    # plt.title('U-Net Segmentation')
    # plt.axis('off')
    # plt.show()

    # Read the original image
    original_img = cv2.imread(preprocess_image_path)

    # Get original dimensions
    ori_height, ori_width = original_img.shape[:2]

    # Resize the mask to match the original image dimensions
    resized_mask = cv2.resize(pred, (ori_width, ori_height))

    # Convert the resized mask to 8-bit unsigned integer type
    resized_mask = (resized_mask * 255).astype(np.uint8)

    # Apply Otsu's threshold to get a binary image
    _, binary_mask = cv2.threshold(resized_mask, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Apply morphological operations to remove noise and connect nearby text
    kernel = np.ones((5, 5), np.uint8)
    cleaned_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
    cleaned_mask = cv2.morphologyEx(cleaned_mask, cv2.MORPH_OPEN, kernel)

    # Find contours in the cleaned mask
    contours, _ = cv2.findContours(cleaned_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter contours based on area to remove small noise
    valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > min_contour_area]

    if not valid_contours:
        print("No valid text regions found.")
        return None

    # Find the bounding rectangle that encompasses all valid contours
    x_min, y_min = ori_width, ori_height
    x_max, y_max = 0, 0

    for contour in valid_contours:
        x, y, w, h = cv2.boundingRect(contour)
        x_min = min(x_min, x)
        y_min = min(y_min, y)
        x_max = max(x_max, x + w)
        y_max = max(y_max, y + h)

    x_min = max(0, x_min - padding)
    y_min = max(0, y_min - padding)
    x_max = min(ori_width, x_max + padding)
    y_max = min(ori_height, y_max + padding)

    # Crop the original image
    cropped_img = original_img[y_min:y_max, x_min:x_max]

    return cropped_img


def postProcessImage(cropped_image):
    """
    Post-processes an image by deskewing, sharpening, and applying morphological dilation, then saves the final processed image.

    Args:
    - image_path (str): Path to the original image.
    - cropped_image_path (str): Path to the cropped image to be post-processed.
    - output_folder (str): Directory where the final post-processed image will be saved.

    Returns:
    - str: The path of the final post-processed image.
    """
    def rotate(
        image: np.ndarray, angle: float, background: Union[int, Tuple[int, int, int]]
    ) -> np.ndarray:
        old_width, old_height = image.shape[:2]
        angle_radian = math.radians(angle)
        width = abs(np.sin(angle_radian) * old_height) + abs(np.cos(angle_radian) * old_width)
        height = abs(np.sin(angle_radian) * old_width) + abs(np.cos(angle_radian) * old_height)

        image_center = tuple(np.array(image.shape[1::-1]) / 2)
        rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
        rot_mat[1, 2] += (width - old_width) / 2
        rot_mat[0, 2] += (height - old_height) / 2
        return cv2.warpAffine(image, rot_mat, (int(round(height)), int(round(width))), borderValue=background)

    # Deskew Image
    # grayscale = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    # angle = determine_skew(grayscale)
    # rotated = rotate(image, angle, (0, 0, 0))
    rotated = cropped_image

    # Sharpening (reduced intensity)
    blurred = cv2.GaussianBlur(rotated, (1,1), sigmaX=3, sigmaY=3)
    sharpened = cv2.addWeighted(rotated, 1.5, blurred, -0.5, 0)

    # Morphological dilation to thicken the text
    dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    dilated = cv2.dilate(sharpened, dilate_kernel, iterations=1)

    return sharpened