Spaces:
Running
Running
Update working_yolo_pipeline.py
Browse files- working_yolo_pipeline.py +215 -72
working_yolo_pipeline.py
CHANGED
|
@@ -17,6 +17,30 @@ torch.load = patched_torch_load
|
|
| 17 |
|
| 18 |
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
import json
|
| 21 |
import argparse
|
| 22 |
import os
|
|
@@ -511,10 +535,71 @@ def calculate_x_gutters(word_data: list, params: Dict, page_height: float) -> Li
|
|
| 511 |
|
| 512 |
return sorted(final_separators)
|
| 513 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
|
| 515 |
def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
| 516 |
top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
|
| 517 |
-
"""Extract word data with OCR caching to avoid redundant Tesseract runs."""
|
| 518 |
word_data = page.get_text("words")
|
| 519 |
|
| 520 |
if len(word_data) > 0:
|
|
@@ -524,45 +609,40 @@ def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
|
| 524 |
word_data = _ocr_cache.get_ocr(pdf_path, page_num)
|
| 525 |
else:
|
| 526 |
try:
|
| 527 |
-
#
|
| 528 |
-
# 1. Render at Higher Resolution (Zoom 4.0 = ~300 DPI)
|
| 529 |
zoom_level = 4.0
|
| 530 |
pix = page.get_pixmap(matrix=fitz.Matrix(zoom_level, zoom_level))
|
| 531 |
-
|
| 532 |
-
# 2. Convert directly to OpenCV format (Faster than PIL)
|
| 533 |
img_np = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
# 3. Apply Preprocessing (Thresholding)
|
| 540 |
-
processed_img = preprocess_image_for_ocr(img_np)
|
| 541 |
-
|
| 542 |
-
# 4. Optimized Tesseract Config
|
| 543 |
-
# --psm 6: Assume a single uniform block of text (Great for columns/questions)
|
| 544 |
-
# --oem 3: Default engine (LSTM)
|
| 545 |
-
custom_config = r'--oem 3 --psm 6'
|
| 546 |
|
| 547 |
-
|
| 548 |
-
|
|
|
|
|
|
|
| 549 |
|
| 550 |
full_word_data = []
|
| 551 |
-
|
| 552 |
-
text
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 560 |
|
| 561 |
word_data = full_word_data
|
| 562 |
_ocr_cache.set_ocr(pdf_path, page_num, word_data)
|
| 563 |
-
# --- OPTIMIZATION END ---
|
| 564 |
except Exception as e:
|
| 565 |
-
print(f" ❌
|
| 566 |
return []
|
| 567 |
|
| 568 |
# Apply margin filtering
|
|
@@ -572,6 +652,17 @@ def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
|
| 572 |
return [d for d in word_data if d[2] >= y_min and d[4] <= y_max]
|
| 573 |
|
| 574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 575 |
def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
|
| 576 |
img_data = pix.samples
|
| 577 |
img = np.frombuffer(img_data, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
|
@@ -992,58 +1083,110 @@ def preprocess_and_ocr_page(original_img: np.ndarray, model, pdf_path: str,
|
|
| 992 |
})
|
| 993 |
else:
|
| 994 |
# === START OF OPTIMIZED OCR BLOCK ===
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
try:
|
| 996 |
-
# 1. Re-render Page at High Resolution (Zoom 4.0
|
| 997 |
ocr_zoom = 4.0
|
| 998 |
pix_ocr = fitz_page.get_pixmap(matrix=fitz.Matrix(ocr_zoom, ocr_zoom))
|
| 999 |
|
| 1000 |
-
# Convert PyMuPDF Pixmap to OpenCV format
|
| 1001 |
-
img_ocr_np = np.frombuffer(pix_ocr.samples, dtype=np.uint8).reshape(
|
| 1002 |
-
|
|
|
|
| 1003 |
if pix_ocr.n == 3:
|
| 1004 |
img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGB2BGR)
|
| 1005 |
elif pix_ocr.n == 4:
|
| 1006 |
img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
|
| 1007 |
|
| 1008 |
-
# 2.
|
| 1009 |
-
|
| 1010 |
-
|
| 1011 |
-
|
| 1012 |
-
|
| 1013 |
-
|
| 1014 |
-
|
| 1015 |
-
|
| 1016 |
-
|
| 1017 |
-
|
| 1018 |
-
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
|
| 1035 |
-
|
| 1036 |
-
|
| 1037 |
-
raw_ocr_output.append({
|
| 1038 |
-
'type': 'text',
|
| 1039 |
-
'word': cleaned_text, # Use the sanitized word
|
| 1040 |
-
'confidence': float(hocr_data['conf'][i]),
|
| 1041 |
-
'bbox': [x1, y1, x2, y2],
|
| 1042 |
-
'y0': y1,
|
| 1043 |
-
'x0': x1
|
| 1044 |
-
})
|
| 1045 |
except Exception as e:
|
| 1046 |
-
print(f" ❌
|
|
|
|
|
|
|
| 1047 |
# === END OF OPTIMIZED OCR BLOCK ===
|
| 1048 |
|
| 1049 |
# ====================================================================
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
#==================================================================================
|
| 23 |
+
#RAPID OCR
|
| 24 |
+
#==================================================================================
|
| 25 |
+
|
| 26 |
+
from rapidocr import RapidOCR, OCRVersion
|
| 27 |
+
|
| 28 |
+
# Initialize RapidOCR (v5 is generally the most accurate current version)
|
| 29 |
+
# We use return_word_box=True to get word-level precision similar to Tesseract's image_to_data
|
| 30 |
+
ocr_engine = RapidOCR(params={
|
| 31 |
+
"Det.ocr_version": OCRVersion.PPOCRV5,
|
| 32 |
+
"Rec.ocr_version": OCRVersion.PPOCRV5,
|
| 33 |
+
"return_word_box": True
|
| 34 |
+
})
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
#==================================================================================
|
| 39 |
+
#RAPID OCR
|
| 40 |
+
#==================================================================================
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
|
| 44 |
import json
|
| 45 |
import argparse
|
| 46 |
import os
|
|
|
|
| 535 |
|
| 536 |
return sorted(final_separators)
|
| 537 |
|
| 538 |
+
#======================================================================================================================================
|
| 539 |
+
# def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
| 540 |
+
# top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
|
| 541 |
+
# """Extract word data with OCR caching to avoid redundant Tesseract runs."""
|
| 542 |
+
# word_data = page.get_text("words")
|
| 543 |
+
|
| 544 |
+
# if len(word_data) > 0:
|
| 545 |
+
# word_data = [(w[4], w[0], w[1], w[2], w[3]) for w in word_data]
|
| 546 |
+
# else:
|
| 547 |
+
# if _ocr_cache.has_ocr(pdf_path, page_num):
|
| 548 |
+
# word_data = _ocr_cache.get_ocr(pdf_path, page_num)
|
| 549 |
+
# else:
|
| 550 |
+
# try:
|
| 551 |
+
# # --- OPTIMIZATION START ---
|
| 552 |
+
# # 1. Render at Higher Resolution (Zoom 4.0 = ~300 DPI)
|
| 553 |
+
# zoom_level = 4.0
|
| 554 |
+
# pix = page.get_pixmap(matrix=fitz.Matrix(zoom_level, zoom_level))
|
| 555 |
+
|
| 556 |
+
# # 2. Convert directly to OpenCV format (Faster than PIL)
|
| 557 |
+
# img_np = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
| 558 |
+
# if pix.n == 3:
|
| 559 |
+
# img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
|
| 560 |
+
# elif pix.n == 4:
|
| 561 |
+
# img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
|
| 562 |
+
|
| 563 |
+
# # 3. Apply Preprocessing (Thresholding)
|
| 564 |
+
# processed_img = preprocess_image_for_ocr(img_np)
|
| 565 |
+
|
| 566 |
+
# # 4. Optimized Tesseract Config
|
| 567 |
+
# # --psm 6: Assume a single uniform block of text (Great for columns/questions)
|
| 568 |
+
# # --oem 3: Default engine (LSTM)
|
| 569 |
+
# custom_config = r'--oem 3 --psm 6'
|
| 570 |
+
|
| 571 |
+
# data = pytesseract.image_to_data(processed_img, output_type=pytesseract.Output.DICT,
|
| 572 |
+
# config=custom_config)
|
| 573 |
+
|
| 574 |
+
# full_word_data = []
|
| 575 |
+
# for i in range(len(data['level'])):
|
| 576 |
+
# text = data['text'][i].strip()
|
| 577 |
+
# if text:
|
| 578 |
+
# # Scale coordinates back to PDF points
|
| 579 |
+
# x1 = data['left'][i] / zoom_level
|
| 580 |
+
# y1 = data['top'][i] / zoom_level
|
| 581 |
+
# x2 = (data['left'][i] + data['width'][i]) / zoom_level
|
| 582 |
+
# y2 = (data['top'][i] + data['height'][i]) / zoom_level
|
| 583 |
+
# full_word_data.append((text, x1, y1, x2, y2))
|
| 584 |
+
|
| 585 |
+
# word_data = full_word_data
|
| 586 |
+
# _ocr_cache.set_ocr(pdf_path, page_num, word_data)
|
| 587 |
+
# # --- OPTIMIZATION END ---
|
| 588 |
+
# except Exception as e:
|
| 589 |
+
# print(f" ❌ OCR Error in detection phase: {e}")
|
| 590 |
+
# return []
|
| 591 |
+
|
| 592 |
+
# # Apply margin filtering
|
| 593 |
+
# page_height = page.rect.height
|
| 594 |
+
# y_min = page_height * top_margin_percent
|
| 595 |
+
# y_max = page_height * (1 - bottom_margin_percent)
|
| 596 |
+
# return [d for d in word_data if d[2] >= y_min and d[4] <= y_max]
|
| 597 |
+
|
| 598 |
+
#============================================================================================================
|
| 599 |
+
|
| 600 |
|
| 601 |
def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
| 602 |
top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
|
|
|
|
| 603 |
word_data = page.get_text("words")
|
| 604 |
|
| 605 |
if len(word_data) > 0:
|
|
|
|
| 609 |
word_data = _ocr_cache.get_ocr(pdf_path, page_num)
|
| 610 |
else:
|
| 611 |
try:
|
| 612 |
+
# 1. Render at Higher Resolution
|
|
|
|
| 613 |
zoom_level = 4.0
|
| 614 |
pix = page.get_pixmap(matrix=fitz.Matrix(zoom_level, zoom_level))
|
|
|
|
|
|
|
| 615 |
img_np = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
| 616 |
+
|
| 617 |
+
# Convert to BGR for RapidOCR
|
| 618 |
+
if pix.n == 3: img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
|
| 619 |
+
elif pix.n == 4: img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
|
| 621 |
+
# 2. Run RapidOCR
|
| 622 |
+
# RapidOCR returns: [[box, text, score], ...]
|
| 623 |
+
# where box is [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
|
| 624 |
+
results, _ = ocr_engine(img_np)
|
| 625 |
|
| 626 |
full_word_data = []
|
| 627 |
+
if results:
|
| 628 |
+
for box, text, score in results:
|
| 629 |
+
text = text.strip()
|
| 630 |
+
if text:
|
| 631 |
+
# 3. Convert Polygon to BBox and Scale back to PDF points
|
| 632 |
+
xs = [p[0] for p in box]
|
| 633 |
+
ys = [p[1] for p in box]
|
| 634 |
+
|
| 635 |
+
x1 = min(xs) / zoom_level
|
| 636 |
+
y1 = min(ys) / zoom_level
|
| 637 |
+
x2 = max(xs) / zoom_level
|
| 638 |
+
y2 = max(ys) / zoom_level
|
| 639 |
+
|
| 640 |
+
full_word_data.append((text, x1, y1, x2, y2))
|
| 641 |
|
| 642 |
word_data = full_word_data
|
| 643 |
_ocr_cache.set_ocr(pdf_path, page_num, word_data)
|
|
|
|
| 644 |
except Exception as e:
|
| 645 |
+
print(f" ❌ RapidOCR Error in detection phase: {e}")
|
| 646 |
return []
|
| 647 |
|
| 648 |
# Apply margin filtering
|
|
|
|
| 652 |
return [d for d in word_data if d[2] >= y_min and d[4] <= y_max]
|
| 653 |
|
| 654 |
|
| 655 |
+
|
| 656 |
+
#=========================================================================================================================================
|
| 657 |
+
#=============================================================================================================================================
|
| 658 |
+
|
| 659 |
+
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
|
| 663 |
+
|
| 664 |
+
|
| 665 |
+
|
| 666 |
def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
|
| 667 |
img_data = pix.samples
|
| 668 |
img = np.frombuffer(img_data, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
|
|
|
| 1083 |
})
|
| 1084 |
else:
|
| 1085 |
# === START OF OPTIMIZED OCR BLOCK ===
|
| 1086 |
+
# try:
|
| 1087 |
+
# # 1. Re-render Page at High Resolution (Zoom 4.0 = ~300 DPI)
|
| 1088 |
+
# ocr_zoom = 4.0
|
| 1089 |
+
# pix_ocr = fitz_page.get_pixmap(matrix=fitz.Matrix(ocr_zoom, ocr_zoom))
|
| 1090 |
+
|
| 1091 |
+
# # Convert PyMuPDF Pixmap to OpenCV format
|
| 1092 |
+
# img_ocr_np = np.frombuffer(pix_ocr.samples, dtype=np.uint8).reshape(pix_ocr.height, pix_ocr.width,
|
| 1093 |
+
# pix_ocr.n)
|
| 1094 |
+
# if pix_ocr.n == 3:
|
| 1095 |
+
# img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGB2BGR)
|
| 1096 |
+
# elif pix_ocr.n == 4:
|
| 1097 |
+
# img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
|
| 1098 |
+
|
| 1099 |
+
# # 2. Preprocess (Binarization)
|
| 1100 |
+
# processed_img = preprocess_image_for_ocr(img_ocr_np)
|
| 1101 |
+
|
| 1102 |
+
# # 3. Run Tesseract with Optimized Configuration
|
| 1103 |
+
# custom_config = r'--oem 3 --psm 6'
|
| 1104 |
+
|
| 1105 |
+
# hocr_data = pytesseract.image_to_data(
|
| 1106 |
+
# processed_img,
|
| 1107 |
+
# output_type=pytesseract.Output.DICT,
|
| 1108 |
+
# config=custom_config
|
| 1109 |
+
# )
|
| 1110 |
+
|
| 1111 |
+
# for i in range(len(hocr_data['level'])):
|
| 1112 |
+
# text = hocr_data['text'][i] # Retrieve raw Tesseract text
|
| 1113 |
+
|
| 1114 |
+
# # --- FIX: SANITIZE TEXT AND THEN STRIP ---
|
| 1115 |
+
# cleaned_text = sanitize_text(text).strip()
|
| 1116 |
+
|
| 1117 |
+
# if cleaned_text and hocr_data['conf'][i] > -1:
|
| 1118 |
+
# # 4. Coordinate Mapping
|
| 1119 |
+
# scale_adjustment = scale_factor / ocr_zoom
|
| 1120 |
+
|
| 1121 |
+
# x1 = int(hocr_data['left'][i] * scale_adjustment)
|
| 1122 |
+
# y1 = int(hocr_data['top'][i] * scale_adjustment)
|
| 1123 |
+
# w = int(hocr_data['width'][i] * scale_adjustment)
|
| 1124 |
+
# h = int(hocr_data['height'][i] * scale_adjustment)
|
| 1125 |
+
# x2 = x1 + w
|
| 1126 |
+
# y2 = y1 + h
|
| 1127 |
+
|
| 1128 |
+
# raw_ocr_output.append({
|
| 1129 |
+
# 'type': 'text',
|
| 1130 |
+
# 'word': cleaned_text, # Use the sanitized word
|
| 1131 |
+
# 'confidence': float(hocr_data['conf'][i]),
|
| 1132 |
+
# 'bbox': [x1, y1, x2, y2],
|
| 1133 |
+
# 'y0': y1,
|
| 1134 |
+
# 'x0': x1
|
| 1135 |
+
# })
|
| 1136 |
+
# except Exception as e:
|
| 1137 |
+
|
| 1138 |
+
# print(f" ❌ Tesseract OCR Error: {e}")
|
| 1139 |
+
#=============================================================================================================================================================
|
| 1140 |
+
#=============================================================================================================================================================
|
| 1141 |
+
else:
|
| 1142 |
+
# === START OF RAPIDOCR BLOCK ===
|
| 1143 |
try:
|
| 1144 |
+
# 1. Re-render Page at High Resolution (Standardizing to Zoom 4.0)
|
| 1145 |
ocr_zoom = 4.0
|
| 1146 |
pix_ocr = fitz_page.get_pixmap(matrix=fitz.Matrix(ocr_zoom, ocr_zoom))
|
| 1147 |
|
| 1148 |
+
# Convert PyMuPDF Pixmap to OpenCV format (BGR)
|
| 1149 |
+
img_ocr_np = np.frombuffer(pix_ocr.samples, dtype=np.uint8).reshape(
|
| 1150 |
+
pix_ocr.height, pix_ocr.width, pix_ocr.n
|
| 1151 |
+
)
|
| 1152 |
if pix_ocr.n == 3:
|
| 1153 |
img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGB2BGR)
|
| 1154 |
elif pix_ocr.n == 4:
|
| 1155 |
img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
|
| 1156 |
|
| 1157 |
+
# 2. Run RapidOCR (Models handle preprocessing internally)
|
| 1158 |
+
results, _ = ocr_engine(img_ocr_np)
|
| 1159 |
+
|
| 1160 |
+
if results:
|
| 1161 |
+
# Calculate scaling from OCR image (4.0) to your pipeline standard (scale_factor=2.0)
|
| 1162 |
+
scale_adjustment = scale_factor / ocr_zoom
|
| 1163 |
+
|
| 1164 |
+
for box, text, score in results:
|
| 1165 |
+
# Sanitize and clean text
|
| 1166 |
+
cleaned_text = sanitize_text(text).strip()
|
| 1167 |
+
|
| 1168 |
+
if cleaned_text:
|
| 1169 |
+
# 3. Coordinate Mapping (Convert 4-point polygon to x1, y1, x2, y2)
|
| 1170 |
+
xs = [p[0] for p in box]
|
| 1171 |
+
ys = [p[1] for p in box]
|
| 1172 |
+
|
| 1173 |
+
x1 = int(min(xs) * scale_adjustment)
|
| 1174 |
+
y1 = int(min(ys) * scale_adjustment)
|
| 1175 |
+
x2 = int(max(xs) * scale_adjustment)
|
| 1176 |
+
y2 = int(max(ys) * scale_adjustment)
|
| 1177 |
+
|
| 1178 |
+
raw_ocr_output.append({
|
| 1179 |
+
'type': 'text',
|
| 1180 |
+
'word': cleaned_text,
|
| 1181 |
+
'confidence': float(score) * 100, # Converting 0-1.0 to 0-100 scale
|
| 1182 |
+
'bbox': [x1, y1, x2, y2],
|
| 1183 |
+
'y0': y1,
|
| 1184 |
+
'x0': x1
|
| 1185 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1186 |
except Exception as e:
|
| 1187 |
+
print(f" ❌ RapidOCR Fallback Error: {e}")
|
| 1188 |
+
# === END OF RAPIDOCR BLOCK ====================================================================================================================================
|
| 1189 |
+
#===========================================================================================================================================================================
|
| 1190 |
# === END OF OPTIMIZED OCR BLOCK ===
|
| 1191 |
|
| 1192 |
# ====================================================================
|