Spaces:
Build error
Build error
File size: 4,552 Bytes
4dbe5d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
import cv2
import numpy as np
from pdf2image import convert_from_path
from main import RapidOCR
ocr_engine = RapidOCR()
dataPath = '/home/tung/Tung_Works/OCR_code/OCR-20250423T073748Z-001/OCR/OCR辨識失敗-部分樣本'
from image_enhancement import enhance_image
def crop_dynamic(image_rgb):
"""
Dynamically crop the blank regions (white or black) surrounding the object.
Parameters:
image_rgb (numpy.ndarray): Input image in RGB format.
Returns:
cropped_rgb (numpy.ndarray): Cropped RGB image.
bbox (tuple): Bounding box of the cropped region (x, y, w, h).
"""
# Convert to grayscale for easier processing
gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
# Find non-blank rows and columns based on pixel intensity
row_mask = np.any(gray < 240, axis=1) # Detect rows with pixel intensity below the white threshold
col_mask = np.any(gray < 240, axis=0) # Detect columns with pixel intensity below the white threshold
# Adjust logic for black regions by combining white and black detection
row_mask = row_mask | np.any(gray > 10, axis=1) # Include black regions
col_mask = col_mask | np.any(gray > 10, axis=0) # Include black regions
# Find bounding box indices
y_min, y_max = np.where(row_mask)[0][[0, -1]]
x_min, x_max = np.where(col_mask)[0][[0, -1]]
# Crop the region
cropped_rgb = image_rgb[y_min:y_max+1, x_min:x_max+1]
return cropped_rgb, (x_min, y_min, x_max - x_min, y_max - y_min)
list_pdf = []
for root, dirs, files in os.walk(dataPath):
for file in files:
if file.endswith('.pdf'):
pdf_f = os.path.join(root, file)
assert os.path.exists(pdf_f)
list_pdf.append(pdf_f)
sorted(list_pdf)
for idx, pdf_f in enumerate(list_pdf):
bs_name = os.path.basename(pdf_f)
bs_name_0 = os.path.splitext(bs_name)[0]
# images = convert_from_path(pdf_f, dpi=900)
images = convert_from_path(pdf_f, dpi=500, first_page=1, last_page=3)
for i, image in enumerate(images):
#brightness = ImageEnhance.Brightness(image).enhance(1.5)
#contrast = ImageEnhance.Contrast(brightness).enhance(1.8)
#sharpness = ImageEnhance.Sharpness(contrast).enhance(2.0)
#sharpness.save("{i}_"+bs_name)
img = np.array(image)
#img = enhance_image(img)
# img, bbox = crop_dynamic(img)
parameters = {}
parameters['local_contrast'] = 1.5 # 1.5x increase in details
parameters['mid_tones'] = 0.5
parameters['tonal_width'] = 0.5
parameters['areas_dark'] = 0.7 # 70% improvement in dark areas
parameters['areas_bright'] = 0.5 # 50% improvement in bright areas
parameters['saturation_degree'] = 1.2 # 1.2x increase in color saturation
parameters['brightness'] = 0.1 # slight increase in brightness
parameters['preserve_tones'] = True
parameters['color_correction'] = False
img = enhance_image(image, parameters, verbose=False)
print(img.shape)
enhanced_img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Save in OpenCV-compatible format
cv2.imwrite(f'{i + 1}_{bs_name_0}.jpg', enhanced_img_bgr)
print(bs_name_0, i )
rotation_attempts = 0 # Track rotation count
while rotation_attempts < 4: # Rotate at most 4 times (90°, 180°, 270°, and back to original orientation)
result, _ = ocr_engine(img, use_det=True, use_cls=False, use_rec=True)
detected = False # Flag to check detection status
if result:
test_list = [r[1] for r in result]
for j in range(len(test_list) - 1): # Loop up to the second-to-last row
count1 = test_list[j].count("<")
count2 = test_list[j + 1].count("<")
if count1 > 2 and count2 > 2:
print(bs_name_0)
print(f"Consecutive rows with '<' more than 2 times each:")
print(f"Row 1: {test_list[j]} (Occurrences: {count1})")
print(f"Row 2: {test_list[j + 1]} (Occurrences: {count2})")
detected = True
break
if detected:
break # Stop further rotation since rows are detected
# Rotate the image by 90 degrees
img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
rotation_attempts += 1
|