Spaces:
Build error
Build error
File size: 5,897 Bytes
4dbe5d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import os
import cv2
import numpy as np
from pdf2image import convert_from_path
from main import RapidOCR
ocr_engine = RapidOCR()
dataPath = '/home/tung/Tung_Works/OCR_code/OCR-20250423T073748Z-001/OCR/OCR辨識失敗-部分樣本'
from image_enhancement import enhance_image
list_pdf = []
for root, dirs, files in os.walk(dataPath):
for file in files:
if file.endswith('.pdf'):
pdf_f = os.path.join(root, file)
assert os.path.exists(pdf_f)
list_pdf.append(pdf_f)
sorted(list_pdf)
def adaptive_threshold_to_rgb(image_rgb):
"""
Apply adaptive thresholding on the L channel of LAB color space
and reconstruct the thresholded image as RGB.
Parameters:
image_rgb (numpy.ndarray): Input RGB image.
Returns:
thresholded_rgb (numpy.ndarray): RGB image after thresholding the L channel.
"""
# Convert RGB to LAB color space
image_lab = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2LAB)
# Split LAB channels
l_channel, a_channel, b_channel = cv2.split(image_lab)
# Apply adaptive thresholding to the L channel
thresholded_l = cv2.adaptiveThreshold(
l_channel,
maxValue=255,
adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # or ADAPTIVE_THRESH_MEAN_C
thresholdType=cv2.THRESH_BINARY,
blockSize=11,
C=2
)
# Merge thresholded L channel back with original A and B channels
updated_lab = cv2.merge((thresholded_l, a_channel, b_channel))
# Convert LAB back to RGB
thresholded_rgb = cv2.cvtColor(updated_lab, cv2.COLOR_LAB2RGB)
return thresholded_rgb
for idx, pdf_f in enumerate(list_pdf):
bs_name = os.path.basename(pdf_f)
bs_name_0 = os.path.splitext(bs_name)[0]
# images = convert_from_path(pdf_f, dpi=900)
images = convert_from_path(pdf_f, dpi=300, first_page=1, last_page=3)
for i, image in enumerate(images):
img = np.array(image)
print(img.shape)
parameters = {}
parameters['local_contrast'] = 1.2 # 1.2x increase in details
parameters['mid_tones'] = 0.5 # middle of range
parameters['tonal_width'] = 0.5 # middle of range
parameters['areas_dark'] = 0.7 # 70% improvement in dark areas
parameters['areas_bright'] = 0.5 # 50% improvement in bright areas
parameters['brightness'] = 0.1 # slight increase in overall brightness
parameters['saturation_degree'] = 1.2 # 1.2x increase in color saturation
parameters['preserve_tones'] = True
parameters['color_correction'] = True
img = enhance_image(img, parameters, verbose=False)
#print(img.shape, img.dtype, img.max(), img.min())
img = np.uint8(img*255.)
enhanced_img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Save in OpenCV-compatible format
cv2.imwrite(f'{bs_name_0}_{i + 1}.jpg', enhanced_img_bgr)
print(bs_name_0, i )
rotation_attempts = 0 # Track rotation count
while rotation_attempts < 4: # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation)
result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True)
detected = False # Flag to check detection status
if result:
test_list = [r[1] for r in result]
#print(test_list[-5:])
for j in range(len(test_list) - 1): # Loop up to the second-to-last row
count1 = test_list[j].count("<")
count2 = test_list[j + 1].count("<")
if count1 > 1 and count2 > 1:
print(bs_name_0)
print(f"Consecutive rows with '<' more than 2 times each:")
print(f"Row 1: {test_list[j]} (Occurrences: {count1})")
print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})")
detected = True
break
if detected:
break # Stop further rotation since rows are detected
# Rotate the image by 90 degrees
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
rotation_attempts += 1
if not detected:
img = adaptive_threshold_to_rgb(img)
rotation_attempts = 0 # Track rotation count
while rotation_attempts < 4: # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation)
result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True)
detected = False # Flag to check detection status
if result:
test_list = [r[1] for r in result]
#print(test_list[-5:])
for j in range(len(test_list) - 1): # Loop up to the second-to-last row
count1 = test_list[j].count("<")
count2 = test_list[j + 1].count("<")
if count1 > 1 and count2 > 1:
print(bs_name_0)
print(f"Consecutive rows with '<' more than 2 times each:")
print(f"Row 1: {test_list[j]} (Occurrences: {count1})")
print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})")
detected = True
break
if detected:
break # Stop further rotation since rows are detected
# Rotate the image by 90 degrees
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
rotation_attempts += 1
|