Spaces:
Build error
Build error
| import os | |
| import cv2 | |
| import numpy as np | |
| from pdf2image import convert_from_path | |
| from main import RapidOCR | |
| ocr_engine = RapidOCR() | |
| dataPath = '/home/tung/Tung_Works/OCR_code/OCR-20250423T073748Z-001/OCR/OCR辨識失敗-部分樣本' | |
| from image_enhancement import enhance_image | |
| list_pdf = [] | |
| for root, dirs, files in os.walk(dataPath): | |
| for file in files: | |
| if file.endswith('.pdf'): | |
| pdf_f = os.path.join(root, file) | |
| assert os.path.exists(pdf_f) | |
| list_pdf.append(pdf_f) | |
| sorted(list_pdf) | |
| def adaptive_threshold_to_rgb(image_rgb): | |
| """ | |
| Apply adaptive thresholding on the L channel of LAB color space | |
| and reconstruct the thresholded image as RGB. | |
| Parameters: | |
| image_rgb (numpy.ndarray): Input RGB image. | |
| Returns: | |
| thresholded_rgb (numpy.ndarray): RGB image after thresholding the L channel. | |
| """ | |
| # Convert RGB to LAB color space | |
| image_lab = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2LAB) | |
| # Split LAB channels | |
| l_channel, a_channel, b_channel = cv2.split(image_lab) | |
| # Apply adaptive thresholding to the L channel | |
| thresholded_l = cv2.adaptiveThreshold( | |
| l_channel, | |
| maxValue=255, | |
| adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C, # or ADAPTIVE_THRESH_MEAN_C | |
| thresholdType=cv2.THRESH_BINARY, | |
| blockSize=11, | |
| C=2 | |
| ) | |
| # Merge thresholded L channel back with original A and B channels | |
| updated_lab = cv2.merge((thresholded_l, a_channel, b_channel)) | |
| # Convert LAB back to RGB | |
| thresholded_rgb = cv2.cvtColor(updated_lab, cv2.COLOR_LAB2RGB) | |
| return thresholded_rgb | |
| for idx, pdf_f in enumerate(list_pdf): | |
| bs_name = os.path.basename(pdf_f) | |
| bs_name_0 = os.path.splitext(bs_name)[0] | |
| # images = convert_from_path(pdf_f, dpi=900) | |
| images = convert_from_path(pdf_f, dpi=300, first_page=1, last_page=3) | |
| for i, image in enumerate(images): | |
| img = np.array(image) | |
| print(img.shape) | |
| parameters = {} | |
| parameters['local_contrast'] = 1.2 # 1.2x increase in details | |
| parameters['mid_tones'] = 0.5 # middle of range | |
| parameters['tonal_width'] = 0.5 # middle of range | |
| parameters['areas_dark'] = 0.7 # 70% improvement in dark areas | |
| parameters['areas_bright'] = 0.5 # 50% improvement in bright areas | |
| parameters['brightness'] = 0.1 # slight increase in overall brightness | |
| parameters['saturation_degree'] = 1.2 # 1.2x increase in color saturation | |
| parameters['preserve_tones'] = True | |
| parameters['color_correction'] = True | |
| img = enhance_image(img, parameters, verbose=False) | |
| #print(img.shape, img.dtype, img.max(), img.min()) | |
| img = np.uint8(img*255.) | |
| enhanced_img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Save in OpenCV-compatible format | |
| cv2.imwrite(f'{bs_name_0}_{i + 1}.jpg', enhanced_img_bgr) | |
| print(bs_name_0, i ) | |
| rotation_attempts = 0 # Track rotation count | |
| while rotation_attempts < 4: # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation) | |
| result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True) | |
| detected = False # Flag to check detection status | |
| if result: | |
| test_list = [r[1] for r in result] | |
| #print(test_list[-5:]) | |
| for j in range(len(test_list) - 1): # Loop up to the second-to-last row | |
| count1 = test_list[j].count("<") | |
| count2 = test_list[j + 1].count("<") | |
| if count1 > 1 and count2 > 1: | |
| print(bs_name_0) | |
| print(f"Consecutive rows with '<' more than 2 times each:") | |
| print(f"Row 1: {test_list[j]} (Occurrences: {count1})") | |
| print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})") | |
| detected = True | |
| break | |
| if detected: | |
| break # Stop further rotation since rows are detected | |
| # Rotate the image by 90 degrees | |
| img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) | |
| rotation_attempts += 1 | |
| if not detected: | |
| img = adaptive_threshold_to_rgb(img) | |
| rotation_attempts = 0 # Track rotation count | |
| while rotation_attempts < 4: # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation) | |
| result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True) | |
| detected = False # Flag to check detection status | |
| if result: | |
| test_list = [r[1] for r in result] | |
| #print(test_list[-5:]) | |
| for j in range(len(test_list) - 1): # Loop up to the second-to-last row | |
| count1 = test_list[j].count("<") | |
| count2 = test_list[j + 1].count("<") | |
| if count1 > 1 and count2 > 1: | |
| print(bs_name_0) | |
| print(f"Consecutive rows with '<' more than 2 times each:") | |
| print(f"Row 1: {test_list[j]} (Occurrences: {count1})") | |
| print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})") | |
| detected = True | |
| break | |
| if detected: | |
| break # Stop further rotation since rows are detected | |
| # Rotate the image by 90 degrees | |
| img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) | |
| rotation_attempts += 1 | |