peppermenta commited on
Commit
870d666
·
1 Parent(s): 010ba5d

Remove inline model stuff from tests

Browse files
tests/builders/test_blank_page.py CHANGED
@@ -5,9 +5,9 @@ from marker.builders.layout import LayoutBuilder
5
  from marker.builders.line import LineBuilder
6
 
7
 
8
- def test_blank_page(config, doc_provider, layout_model, ocr_error_model, recognition_model, detection_model, inline_detection_model):
9
  layout_builder = LayoutBuilder(layout_model, config)
10
- line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model)
11
  builder = DocumentBuilder(config)
12
  document = builder.build_document(doc_provider)
13
 
 
5
  from marker.builders.line import LineBuilder
6
 
7
 
8
+ def test_blank_page(config, doc_provider, layout_model, ocr_error_model, detection_model):
9
  layout_builder = LayoutBuilder(layout_model, config)
10
+ line_builder = LineBuilder(detection_model, ocr_error_model)
11
  builder = DocumentBuilder(config)
12
  document = builder.build_document(doc_provider)
13
 
tests/builders/test_garbled_pdf.py CHANGED
@@ -30,8 +30,8 @@ def test_garbled_pdf(pdf_document, detection_model, recognition_model, table_rec
30
 
31
  @pytest.mark.filename("hindi_judgement.pdf")
32
  @pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
33
- def test_garbled_builder(config, doc_provider, detection_model, inline_detection_model, ocr_error_model):
34
- line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model, config)
35
  builder = DocumentBuilder(config)
36
  document = builder.build_document(doc_provider)
37
 
@@ -42,8 +42,8 @@ def test_garbled_builder(config, doc_provider, detection_model, inline_detection
42
 
43
  @pytest.mark.filename("adversarial.pdf")
44
  @pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
45
- def test_nongarbled_builder(config, doc_provider, detection_model, inline_detection_model, ocr_error_model):
46
- line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model, config)
47
  builder = DocumentBuilder(config)
48
  document = builder.build_document(doc_provider)
49
 
 
30
 
31
  @pytest.mark.filename("hindi_judgement.pdf")
32
  @pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
33
+ def test_garbled_builder(config, doc_provider, detection_model, ocr_error_model):
34
+ line_builder = LineBuilder(detection_model, ocr_error_model, config)
35
  builder = DocumentBuilder(config)
36
  document = builder.build_document(doc_provider)
37
 
 
42
 
43
  @pytest.mark.filename("adversarial.pdf")
44
  @pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
45
+ def test_nongarbled_builder(config, doc_provider, detection_model, ocr_error_model):
46
+ line_builder = LineBuilder(detection_model, ocr_error_model, config)
47
  builder = DocumentBuilder(config)
48
  document = builder.build_document(doc_provider)
49
 
tests/builders/test_inline_math_lines.py CHANGED
@@ -3,6 +3,7 @@ import pytest
3
  from marker.processors.line_merge import LineMergeProcessor
4
  from marker.schema import BlockTypes
5
 
 
6
  @pytest.mark.config({"page_range": [1]})
7
  def test_inline_box_nomerging(pdf_document, config):
8
  first_page = pdf_document.pages[0]
@@ -17,6 +18,7 @@ def test_inline_box_nomerging(pdf_document, config):
17
  assert line_count == 46
18
 
19
 
 
20
  @pytest.mark.config({"page_range": [1], "use_llm": True})
21
  def test_inline_box_merging(pdf_document, config):
22
  first_page = pdf_document.pages[0]
 
3
  from marker.processors.line_merge import LineMergeProcessor
4
  from marker.schema import BlockTypes
5
 
6
+ @pytest.mark.skip("We do not support this functionality anymore")
7
  @pytest.mark.config({"page_range": [1]})
8
  def test_inline_box_nomerging(pdf_document, config):
9
  first_page = pdf_document.pages[0]
 
18
  assert line_count == 46
19
 
20
 
21
+ @pytest.mark.skip("We do not support this functionality anymore")
22
  @pytest.mark.config({"page_range": [1], "use_llm": True})
23
  def test_inline_box_merging(pdf_document, config):
24
  first_page = pdf_document.pages[0]
tests/builders/test_layout_replace.py CHANGED
@@ -9,10 +9,10 @@ from marker.schema.registry import get_block_class
9
 
10
 
11
  @pytest.mark.config({"page_range": [0]})
12
- def test_layout_replace(request, config, doc_provider, layout_model, ocr_error_model, detection_model, inline_detection_model):
13
  # The llm layout builder replaces blocks - this makes sure text is still merged properly
14
  layout_builder = LayoutBuilder(layout_model, config)
15
- line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model, config)
16
  builder = DocumentBuilder(config)
17
  document = builder.build_document(doc_provider)
18
  layout_builder(document, doc_provider)
 
9
 
10
 
11
  @pytest.mark.config({"page_range": [0]})
12
+ def test_layout_replace(request, config, doc_provider, layout_model, ocr_error_model, detection_model):
13
  # The llm layout builder replaces blocks - this makes sure text is still merged properly
14
  layout_builder = LayoutBuilder(layout_model, config)
15
+ line_builder = LineBuilder(detection_model, ocr_error_model, config)
16
  builder = DocumentBuilder(config)
17
  document = builder.build_document(doc_provider)
18
  layout_builder(document, doc_provider)
tests/conftest.py CHANGED
@@ -54,10 +54,6 @@ def table_rec_model(model_dict):
54
  def ocr_error_model(model_dict):
55
  yield model_dict["ocr_error_model"]
56
 
57
- @pytest.fixture(scope="session")
58
- def inline_detection_model(model_dict):
59
- yield model_dict["inline_detection_model"]
60
-
61
  @pytest.fixture(scope="function")
62
  def config(request):
63
  config_mark = request.node.get_closest_marker("config")
@@ -93,9 +89,9 @@ def doc_provider(request, config, temp_doc):
93
  yield provider_cls(temp_doc.name, config)
94
 
95
  @pytest.fixture(scope="function")
96
- def pdf_document(request, config, doc_provider, layout_model, ocr_error_model, recognition_model, detection_model, inline_detection_model):
97
  layout_builder = LayoutBuilder(layout_model, config)
98
- line_builder = LineBuilder(detection_model, inline_detection_model, ocr_error_model, config)
99
  ocr_builder = OcrBuilder(recognition_model, config)
100
  builder = DocumentBuilder(config)
101
  document = builder(doc_provider, layout_builder, line_builder, ocr_builder)
 
54
  def ocr_error_model(model_dict):
55
  yield model_dict["ocr_error_model"]
56
 
 
 
 
 
57
  @pytest.fixture(scope="function")
58
  def config(request):
59
  config_mark = request.node.get_closest_marker("config")
 
89
  yield provider_cls(temp_doc.name, config)
90
 
91
  @pytest.fixture(scope="function")
92
+ def pdf_document(request, config, doc_provider, layout_model, ocr_error_model, recognition_model, detection_model):
93
  layout_builder = LayoutBuilder(layout_model, config)
94
+ line_builder = LineBuilder(detection_model, ocr_error_model, config)
95
  ocr_builder = OcrBuilder(recognition_model, config)
96
  builder = DocumentBuilder(config)
97
  document = builder(doc_provider, layout_builder, line_builder, ocr_builder)