Spaces:

LinhKL2002
/

App

Build error

App

File size: 4,552 Bytes

4dbe5d1

import os
import cv2
import numpy as np
from pdf2image import convert_from_path

from main import RapidOCR
ocr_engine = RapidOCR()

dataPath = '/home/tung/Tung_Works/OCR_code/OCR-20250423T073748Z-001/OCR/OCR辨識失敗-部分樣本'

from image_enhancement import enhance_image

def crop_dynamic(image_rgb):
    """
    Dynamically crop the blank regions (white or black) surrounding the object.

    Parameters:
        image_rgb (numpy.ndarray): Input image in RGB format.

    Returns:
        cropped_rgb (numpy.ndarray): Cropped RGB image.
        bbox (tuple): Bounding box of the cropped region (x, y, w, h).
    """
    # Convert to grayscale for easier processing
    gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)

    # Find non-blank rows and columns based on pixel intensity
    row_mask = np.any(gray < 240, axis=1)  # Detect rows with pixel intensity below the white threshold
    col_mask = np.any(gray < 240, axis=0)  # Detect columns with pixel intensity below the white threshold

    # Adjust logic for black regions by combining white and black detection
    row_mask = row_mask | np.any(gray > 10, axis=1)  # Include black regions
    col_mask = col_mask | np.any(gray > 10, axis=0)  # Include black regions

    # Find bounding box indices
    y_min, y_max = np.where(row_mask)[0][[0, -1]]
    x_min, x_max = np.where(col_mask)[0][[0, -1]]

    # Crop the region
    cropped_rgb = image_rgb[y_min:y_max+1, x_min:x_max+1]
    return cropped_rgb, (x_min, y_min, x_max - x_min, y_max - y_min)

list_pdf = []
for root, dirs, files in os.walk(dataPath):
    for file in files:
        if file.endswith('.pdf'):
            pdf_f = os.path.join(root, file)
            assert os.path.exists(pdf_f)
            list_pdf.append(pdf_f)
sorted(list_pdf)

for idx, pdf_f in enumerate(list_pdf):
    bs_name = os.path.basename(pdf_f)
    bs_name_0 = os.path.splitext(bs_name)[0]
    

#    images = convert_from_path(pdf_f, dpi=900)
    images = convert_from_path(pdf_f, dpi=500, first_page=1, last_page=3)
    for i, image in enumerate(images):
        #brightness = ImageEnhance.Brightness(image).enhance(1.5)
        #contrast = ImageEnhance.Contrast(brightness).enhance(1.8)
        #sharpness = ImageEnhance.Sharpness(contrast).enhance(2.0)
        #sharpness.save("{i}_"+bs_name)
        img = np.array(image)
        #img = enhance_image(img)
#        img, bbox = crop_dynamic(img)

        parameters = {}
        parameters['local_contrast'] = 1.5  # 1.5x increase in details
        parameters['mid_tones'] = 0.5
        parameters['tonal_width'] = 0.5
        parameters['areas_dark'] = 0.7  # 70% improvement in dark areas
        parameters['areas_bright'] = 0.5  # 50% improvement in bright areas
        parameters['saturation_degree'] = 1.2  # 1.2x increase in color saturation
        parameters['brightness'] = 0.1  # slight increase in brightness
        parameters['preserve_tones'] = True
        parameters['color_correction'] = False
        img = enhance_image(image, parameters, verbose=False)

        print(img.shape)
        enhanced_img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # Save in OpenCV-compatible format
        cv2.imwrite(f'{i + 1}_{bs_name_0}.jpg', enhanced_img_bgr)
        print(bs_name_0, i )
        rotation_attempts = 0  # Track rotation count
        
        while rotation_attempts < 4:  # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation)
            result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True)
            detected = False  # Flag to check detection status
            if result:
                test_list = [r[1] for r in result]

                for j in range(len(test_list) - 1):  # Loop up to the second-to-last row
                    count1 = test_list[j].count("<")
                    count2 = test_list[j + 1].count("<")
                    if count1 > 2 and count2 > 2:
                        print(bs_name_0)
                        print(f"Consecutive rows with '<' more than 2 times each:")
                        print(f"Row 1: {test_list[j]} (Occurrences: {count1})")
                        print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})")
                        detected = True
                        break
                
                if detected:
                    break  # Stop further rotation since rows are detected
                
            # Rotate the image by 90 degrees
            img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            rotation_attempts += 1