Moses Paul R commited on
Commit
5aa8146
·
1 Parent(s): 974e8d0

reduce threshold and remove dead code [skip ci]

Browse files
marker/v2/builders/layout.py CHANGED
@@ -110,22 +110,11 @@ class LayoutBuilder(BaseBuilder):
110
  document_page.add_full_block(span)
111
  line.add_structure(span)
112
 
113
- for line_idx in provider_line_idxs.difference(assigned_line_idxs):
114
- line = provider_lines[line_idx]
115
- document_page.add_full_block(line)
116
- text_block = document_page.add_block(Text, polygon=line.polygon)
117
- document_page.add_structure(text_block)
118
- text_block.text_extraction_method = "pdftext"
119
- text_block.add_structure(line)
120
- for span in line_spans[line_idx]:
121
- document_page.add_full_block(span)
122
- text_block.add_structure(span)
123
-
124
  def check_layout_coverage(
125
  self,
126
  document_page: PageGroup,
127
  provider_lines: List[Line],
128
- coverage_threshold=0.6
129
  ):
130
  layout_area = 0
131
  provider_area = 0
 
110
  document_page.add_full_block(span)
111
  line.add_structure(span)
112
 
 
 
 
 
 
 
 
 
 
 
 
113
  def check_layout_coverage(
114
  self,
115
  document_page: PageGroup,
116
  provider_lines: List[Line],
117
+ coverage_threshold=0.5
118
  ):
119
  layout_area = 0
120
  provider_area = 0
marker/v2/converters/pdf.py CHANGED
@@ -29,7 +29,7 @@ class PdfConverter(BaseConverter):
29
  self.detection_model = setup_detection_model()
30
 
31
  def __call__(self, filepath: str, page_range: List[int] | None = None):
32
- pdf_provider = PdfProvider(filepath, {"page_range": page_range})
33
 
34
  layout_builder = LayoutBuilder(self.layout_model)
35
  ocr_builder = OcrBuilder(self.detection_model, self.recognition_model)
 
29
  self.detection_model = setup_detection_model()
30
 
31
  def __call__(self, filepath: str, page_range: List[int] | None = None):
32
+ pdf_provider = PdfProvider(filepath, {"page_range": page_range, "force_ocr": False})
33
 
34
  layout_builder = LayoutBuilder(self.layout_model)
35
  ocr_builder = OcrBuilder(self.detection_model, self.recognition_model)