File size: 5,897 Bytes
4dbe5d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import cv2
import numpy as np
from pdf2image import convert_from_path

from main import RapidOCR
ocr_engine = RapidOCR()

dataPath = '/home/tung/Tung_Works/OCR_code/OCR-20250423T073748Z-001/OCR/OCR辨識失敗-部分樣本'

from image_enhancement import enhance_image

list_pdf = []
for root, dirs, files in os.walk(dataPath):
    for file in files:
        if file.endswith('.pdf'):
            pdf_f = os.path.join(root, file)
            assert os.path.exists(pdf_f)
            list_pdf.append(pdf_f)
sorted(list_pdf)

def adaptive_threshold_to_rgb(image_rgb):
    """
    Apply adaptive thresholding on the L channel of LAB color space 
    and reconstruct the thresholded image as RGB.

    Parameters:
        image_rgb (numpy.ndarray): Input RGB image.

    Returns:
        thresholded_rgb (numpy.ndarray): RGB image after thresholding the L channel.
    """
    # Convert RGB to LAB color space
    image_lab = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2LAB)

    # Split LAB channels
    l_channel, a_channel, b_channel = cv2.split(image_lab)

    # Apply adaptive thresholding to the L channel
    thresholded_l = cv2.adaptiveThreshold(
        l_channel, 
        maxValue=255, 
        adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,  # or ADAPTIVE_THRESH_MEAN_C
        thresholdType=cv2.THRESH_BINARY, 
        blockSize=11, 
        C=2
    )

    # Merge thresholded L channel back with original A and B channels
    updated_lab = cv2.merge((thresholded_l, a_channel, b_channel))

    # Convert LAB back to RGB
    thresholded_rgb = cv2.cvtColor(updated_lab, cv2.COLOR_LAB2RGB)

    return thresholded_rgb

for idx, pdf_f in enumerate(list_pdf):
    bs_name = os.path.basename(pdf_f)
    bs_name_0 = os.path.splitext(bs_name)[0]

#    images = convert_from_path(pdf_f, dpi=900)
    images = convert_from_path(pdf_f, dpi=300, first_page=1, last_page=3)
    for i, image in enumerate(images):
        img = np.array(image)
        print(img.shape)
        parameters = {}
        parameters['local_contrast'] = 1.2  # 1.2x increase in details
        parameters['mid_tones'] = 0.5  # middle of range
        parameters['tonal_width'] = 0.5  # middle of range
        parameters['areas_dark'] = 0.7  # 70% improvement in dark areas
        parameters['areas_bright'] = 0.5  # 50% improvement in bright areas
        parameters['brightness'] = 0.1  # slight increase in overall brightness
        parameters['saturation_degree'] = 1.2  # 1.2x increase in color saturation
        parameters['preserve_tones'] = True
        parameters['color_correction'] = True
        img = enhance_image(img, parameters, verbose=False)  
        #print(img.shape, img.dtype, img.max(), img.min())
        img = np.uint8(img*255.)

        enhanced_img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # Save in OpenCV-compatible format
        cv2.imwrite(f'{bs_name_0}_{i + 1}.jpg', enhanced_img_bgr)
        print(bs_name_0, i )
        rotation_attempts = 0  # Track rotation count
        
        while rotation_attempts < 4:  # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation)
            result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True)
            detected = False  # Flag to check detection status
            if result:
                test_list = [r[1] for r in result]
                #print(test_list[-5:])

                for j in range(len(test_list) - 1):  # Loop up to the second-to-last row
                    count1 = test_list[j].count("<")
                    count2 = test_list[j + 1].count("<")
                    if count1 > 1 and count2 > 1:
                        print(bs_name_0)                                                                                                                                   
                        print(f"Consecutive rows with '<' more than 2 times each:")
                        print(f"Row 1: {test_list[j]} (Occurrences: {count1})")
                        print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})")
                        detected = True
                        break
                
                if detected:
                    break  # Stop further rotation since rows are detected
                
            # Rotate the image by 90 degrees
            img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
            rotation_attempts += 1
        
        if not detected:
            img = adaptive_threshold_to_rgb(img)
            rotation_attempts = 0  # Track rotation count
            
            while rotation_attempts < 4:  # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation)
                result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True)
                detected = False  # Flag to check detection status
                if result:
                    test_list = [r[1] for r in result]
                    #print(test_list[-5:])

                    for j in range(len(test_list) - 1):  # Loop up to the second-to-last row
                        count1 = test_list[j].count("<")
                        count2 = test_list[j + 1].count("<")
                        if count1 > 1 and count2 > 1:
                            print(bs_name_0)
                            print(f"Consecutive rows with '<' more than 2 times each:")
                            print(f"Row 1: {test_list[j]} (Occurrences: {count1})")
                            print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})")
                            detected = True
                            break
                    
                    if detected:
                        break  # Stop further rotation since rows are detected
                    
                # Rotate the image by 90 degrees
                img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
                rotation_attempts += 1