Update mdr_pdf_parser.py
Browse files- mdr_pdf_parser.py +3 -3
mdr_pdf_parser.py
CHANGED
|
@@ -222,7 +222,7 @@ class MDRTableLayoutParsedFormat(Enum):
|
|
| 222 |
HTML = auto()
|
| 223 |
|
| 224 |
|
| 225 |
-
@dataclass
|
| 226 |
class MDRBaseLayoutElement:
|
| 227 |
"""Base class for layout elements found on a page."""
|
| 228 |
rect: MDRRectangle;
|
|
@@ -1852,7 +1852,7 @@ class MDROcrEngine:
|
|
| 1852 |
# much lower thresholds so we actually get some candidate masks:
|
| 1853 |
det_db_thresh=0.1,
|
| 1854 |
det_db_box_thresh=0.3,
|
| 1855 |
-
drop_score=0.
|
| 1856 |
use_angle_cls=False,
|
| 1857 |
)
|
| 1858 |
try:
|
|
@@ -1906,7 +1906,7 @@ class MDROcrEngine:
|
|
| 1906 |
try:
|
| 1907 |
pts = [(float(p[0]), float(p[1])) for p in box_pts]
|
| 1908 |
if len(pts) == 4:
|
| 1909 |
-
r = MDRRectangle(lt=pts[0], rt=pts[1],
|
| 1910 |
if r.is_valid and r.area > 1:
|
| 1911 |
yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
|
| 1912 |
fragments_generated_count += 1
|
|
|
|
| 222 |
HTML = auto()
|
| 223 |
|
| 224 |
|
| 225 |
+
@dataclass(eq=False)
|
| 226 |
class MDRBaseLayoutElement:
|
| 227 |
"""Base class for layout elements found on a page."""
|
| 228 |
rect: MDRRectangle;
|
|
|
|
| 1852 |
# much lower thresholds so we actually get some candidate masks:
|
| 1853 |
det_db_thresh=0.1,
|
| 1854 |
det_db_box_thresh=0.3,
|
| 1855 |
+
drop_score=0.0,
|
| 1856 |
use_angle_cls=False,
|
| 1857 |
)
|
| 1858 |
try:
|
|
|
|
| 1906 |
try:
|
| 1907 |
pts = [(float(p[0]), float(p[1])) for p in box_pts]
|
| 1908 |
if len(pts) == 4:
|
| 1909 |
+
r = MDRRectangle(lt=pts[0], rt=pts[1], lb=pts[2], rb=pts[3])
|
| 1910 |
if r.is_valid and r.area > 1:
|
| 1911 |
yield MDROcrFragment(order=-1, text=txt, rank=float(conf), rect=r)
|
| 1912 |
fragments_generated_count += 1
|