Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +7 -5
mdr_pdf_parser.py
CHANGED
|
@@ -1780,10 +1780,12 @@ def mdr_correct_layout_fragments(ocr_engine: 'MDROcrEngine', source_img: Image,
|
|
| 1780 |
|
| 1781 |
# --- MDR OCR Engine ---
|
| 1782 |
|
| 1783 |
-
_MDR_OCR_MODELS = {"det": ("
|
| 1784 |
-
"
|
|
|
|
|
|
|
| 1785 |
|
| 1786 |
-
_MDR_OCR_URL_BASE = "https://
|
| 1787 |
|
| 1788 |
|
| 1789 |
@dataclass
|
|
@@ -1797,7 +1799,7 @@ class _MDR_ONNXParams:
|
|
| 1797 |
|
| 1798 |
# Attributes with default values (Group 1)
|
| 1799 |
use_angle_cls: bool = True
|
| 1800 |
-
rec_image_shape: str = "3,48,
|
| 1801 |
cls_image_shape: str = "3,48,192"
|
| 1802 |
cls_batch_num: int = 6
|
| 1803 |
cls_thresh: float = 0.9
|
|
@@ -1853,7 +1855,7 @@ class MDROcrEngine:
|
|
| 1853 |
det_model_dir=paths["det"],
|
| 1854 |
cls_model_dir=paths["cls"],
|
| 1855 |
rec_model_dir=paths["rec"],
|
| 1856 |
-
rec_char_dict_path=
|
| 1857 |
# much lower thresholds so we actually get some candidate masks:
|
| 1858 |
det_db_thresh=0.1,
|
| 1859 |
det_db_box_thresh=0.3,
|
|
|
|
| 1780 |
|
| 1781 |
# --- MDR OCR Engine ---
|
| 1782 |
|
| 1783 |
+
_MDR_OCR_MODELS = {"det": ("ppocr_onnx", "model", "det_model", "en_PP-OCRv3_det_infer.onnx"),
|
| 1784 |
+
"cls": ("ppocr_onnx", "model", "cls_model", "ch_ppocr_mobile_v2.0_cls_infer.onnx"),
|
| 1785 |
+
"rec": ("ppocr_onnx", "model", "rec_model", "en_PP-OCRv3_rec_infer.onnx"),
|
| 1786 |
+
"keys": ("ppocr_onnx", "ppocr", "utils", "dict", "en_dict.txt")}
|
| 1787 |
|
| 1788 |
+
_MDR_OCR_URL_BASE = "https://raw.githubusercontent.com/Kazuhito00/PaddleOCR-ONNX-Sample/main/"
|
| 1789 |
|
| 1790 |
|
| 1791 |
@dataclass
|
|
|
|
| 1799 |
|
| 1800 |
# Attributes with default values (Group 1)
|
| 1801 |
use_angle_cls: bool = True
|
| 1802 |
+
rec_image_shape: str = "3,48,640"
|
| 1803 |
cls_image_shape: str = "3,48,192"
|
| 1804 |
cls_batch_num: int = 6
|
| 1805 |
cls_thresh: float = 0.9
|
|
|
|
| 1855 |
det_model_dir=paths["det"],
|
| 1856 |
cls_model_dir=paths["cls"],
|
| 1857 |
rec_model_dir=paths["rec"],
|
| 1858 |
+
rec_char_dict_path=None,
|
| 1859 |
# much lower thresholds so we actually get some candidate masks:
|
| 1860 |
det_db_thresh=0.1,
|
| 1861 |
det_db_box_thresh=0.3,
|