import os import cv2 import numpy as np from pdf2image import convert_from_path from main import RapidOCR ocr_engine = RapidOCR() dataPath = '/home/tung/Tung_Works/OCR_code/OCR-20250423T073748Z-001/OCR/OCR辨識失敗-部分樣本' from image_enhancement import enhance_image def crop_dynamic(image_rgb): """ Dynamically crop the blank regions (white or black) surrounding the object. Parameters: image_rgb (numpy.ndarray): Input image in RGB format. Returns: cropped_rgb (numpy.ndarray): Cropped RGB image. bbox (tuple): Bounding box of the cropped region (x, y, w, h). """ # Convert to grayscale for easier processing gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY) # Find non-blank rows and columns based on pixel intensity row_mask = np.any(gray < 240, axis=1) # Detect rows with pixel intensity below the white threshold col_mask = np.any(gray < 240, axis=0) # Detect columns with pixel intensity below the white threshold # Adjust logic for black regions by combining white and black detection row_mask = row_mask | np.any(gray > 10, axis=1) # Include black regions col_mask = col_mask | np.any(gray > 10, axis=0) # Include black regions # Find bounding box indices y_min, y_max = np.where(row_mask)[0][[0, -1]] x_min, x_max = np.where(col_mask)[0][[0, -1]] # Crop the region cropped_rgb = image_rgb[y_min:y_max+1, x_min:x_max+1] return cropped_rgb, (x_min, y_min, x_max - x_min, y_max - y_min) list_pdf = [] for root, dirs, files in os.walk(dataPath): for file in files: if file.endswith('.pdf'): pdf_f = os.path.join(root, file) assert os.path.exists(pdf_f) list_pdf.append(pdf_f) sorted(list_pdf) for idx, pdf_f in enumerate(list_pdf): bs_name = os.path.basename(pdf_f) bs_name_0 = os.path.splitext(bs_name)[0] # images = convert_from_path(pdf_f, dpi=900) images = convert_from_path(pdf_f, dpi=500, first_page=1, last_page=3) for i, image in enumerate(images): #brightness = ImageEnhance.Brightness(image).enhance(1.5) #contrast = ImageEnhance.Contrast(brightness).enhance(1.8) #sharpness = ImageEnhance.Sharpness(contrast).enhance(2.0) #sharpness.save("{i}_"+bs_name) img = np.array(image) #img = enhance_image(img) # img, bbox = crop_dynamic(img) parameters = {} parameters['local_contrast'] = 1.5 # 1.5x increase in details parameters['mid_tones'] = 0.5 parameters['tonal_width'] = 0.5 parameters['areas_dark'] = 0.7 # 70% improvement in dark areas parameters['areas_bright'] = 0.5 # 50% improvement in bright areas parameters['saturation_degree'] = 1.2 # 1.2x increase in color saturation parameters['brightness'] = 0.1 # slight increase in brightness parameters['preserve_tones'] = True parameters['color_correction'] = False img = enhance_image(image, parameters, verbose=False) print(img.shape) enhanced_img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Save in OpenCV-compatible format cv2.imwrite(f'{i + 1}_{bs_name_0}.jpg', enhanced_img_bgr) print(bs_name_0, i ) rotation_attempts = 0 # Track rotation count while rotation_attempts < 4: # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation) result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True) detected = False # Flag to check detection status if result: test_list = [r[1] for r in result] for j in range(len(test_list) - 1): # Loop up to the second-to-last row count1 = test_list[j].count("<") count2 = test_list[j + 1].count("<") if count1 > 2 and count2 > 2: print(bs_name_0) print(f"Consecutive rows with '<' more than 2 times each:") print(f"Row 1: {test_list[j]} (Occurrences: {count1})") print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})") detected = True break if detected: break # Stop further rotation since rows are detected # Rotate the image by 90 degrees img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE) rotation_attempts += 1