Commit
·
870d666
1
Parent(s):
010ba5d
Remove inline model stuff from tests
Browse files
tests/builders/test_blank_page.py
CHANGED
|
@@ -5,9 +5,9 @@ from marker.builders.layout import LayoutBuilder
|
|
| 5 |
from marker.builders.line import LineBuilder
|
| 6 |
|
| 7 |
|
| 8 |
-
def test_blank_page(config, doc_provider, layout_model, ocr_error_model,
|
| 9 |
layout_builder = LayoutBuilder(layout_model, config)
|
| 10 |
-
line_builder = LineBuilder(detection_model,
|
| 11 |
builder = DocumentBuilder(config)
|
| 12 |
document = builder.build_document(doc_provider)
|
| 13 |
|
|
|
|
| 5 |
from marker.builders.line import LineBuilder
|
| 6 |
|
| 7 |
|
| 8 |
+
def test_blank_page(config, doc_provider, layout_model, ocr_error_model, detection_model):
|
| 9 |
layout_builder = LayoutBuilder(layout_model, config)
|
| 10 |
+
line_builder = LineBuilder(detection_model, ocr_error_model)
|
| 11 |
builder = DocumentBuilder(config)
|
| 12 |
document = builder.build_document(doc_provider)
|
| 13 |
|
tests/builders/test_garbled_pdf.py
CHANGED
|
@@ -30,8 +30,8 @@ def test_garbled_pdf(pdf_document, detection_model, recognition_model, table_rec
|
|
| 30 |
|
| 31 |
@pytest.mark.filename("hindi_judgement.pdf")
|
| 32 |
@pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
|
| 33 |
-
def test_garbled_builder(config, doc_provider, detection_model,
|
| 34 |
-
line_builder = LineBuilder(detection_model,
|
| 35 |
builder = DocumentBuilder(config)
|
| 36 |
document = builder.build_document(doc_provider)
|
| 37 |
|
|
@@ -42,8 +42,8 @@ def test_garbled_builder(config, doc_provider, detection_model, inline_detection
|
|
| 42 |
|
| 43 |
@pytest.mark.filename("adversarial.pdf")
|
| 44 |
@pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
|
| 45 |
-
def test_nongarbled_builder(config, doc_provider, detection_model,
|
| 46 |
-
line_builder = LineBuilder(detection_model,
|
| 47 |
builder = DocumentBuilder(config)
|
| 48 |
document = builder.build_document(doc_provider)
|
| 49 |
|
|
|
|
| 30 |
|
| 31 |
@pytest.mark.filename("hindi_judgement.pdf")
|
| 32 |
@pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
|
| 33 |
+
def test_garbled_builder(config, doc_provider, detection_model, ocr_error_model):
|
| 34 |
+
line_builder = LineBuilder(detection_model, ocr_error_model, config)
|
| 35 |
builder = DocumentBuilder(config)
|
| 36 |
document = builder.build_document(doc_provider)
|
| 37 |
|
|
|
|
| 42 |
|
| 43 |
@pytest.mark.filename("adversarial.pdf")
|
| 44 |
@pytest.mark.config({"page_range": [2, 3], "disable_ocr": True})
|
| 45 |
+
def test_nongarbled_builder(config, doc_provider, detection_model, ocr_error_model):
|
| 46 |
+
line_builder = LineBuilder(detection_model, ocr_error_model, config)
|
| 47 |
builder = DocumentBuilder(config)
|
| 48 |
document = builder.build_document(doc_provider)
|
| 49 |
|
tests/builders/test_inline_math_lines.py
CHANGED
|
@@ -3,6 +3,7 @@ import pytest
|
|
| 3 |
from marker.processors.line_merge import LineMergeProcessor
|
| 4 |
from marker.schema import BlockTypes
|
| 5 |
|
|
|
|
| 6 |
@pytest.mark.config({"page_range": [1]})
|
| 7 |
def test_inline_box_nomerging(pdf_document, config):
|
| 8 |
first_page = pdf_document.pages[0]
|
|
@@ -17,6 +18,7 @@ def test_inline_box_nomerging(pdf_document, config):
|
|
| 17 |
assert line_count == 46
|
| 18 |
|
| 19 |
|
|
|
|
| 20 |
@pytest.mark.config({"page_range": [1], "use_llm": True})
|
| 21 |
def test_inline_box_merging(pdf_document, config):
|
| 22 |
first_page = pdf_document.pages[0]
|
|
|
|
| 3 |
from marker.processors.line_merge import LineMergeProcessor
|
| 4 |
from marker.schema import BlockTypes
|
| 5 |
|
| 6 |
+
@pytest.mark.skip("We do not support this functionality anymore")
|
| 7 |
@pytest.mark.config({"page_range": [1]})
|
| 8 |
def test_inline_box_nomerging(pdf_document, config):
|
| 9 |
first_page = pdf_document.pages[0]
|
|
|
|
| 18 |
assert line_count == 46
|
| 19 |
|
| 20 |
|
| 21 |
+
@pytest.mark.skip("We do not support this functionality anymore")
|
| 22 |
@pytest.mark.config({"page_range": [1], "use_llm": True})
|
| 23 |
def test_inline_box_merging(pdf_document, config):
|
| 24 |
first_page = pdf_document.pages[0]
|
tests/builders/test_layout_replace.py
CHANGED
|
@@ -9,10 +9,10 @@ from marker.schema.registry import get_block_class
|
|
| 9 |
|
| 10 |
|
| 11 |
@pytest.mark.config({"page_range": [0]})
|
| 12 |
-
def test_layout_replace(request, config, doc_provider, layout_model, ocr_error_model, detection_model
|
| 13 |
# The llm layout builder replaces blocks - this makes sure text is still merged properly
|
| 14 |
layout_builder = LayoutBuilder(layout_model, config)
|
| 15 |
-
line_builder = LineBuilder(detection_model,
|
| 16 |
builder = DocumentBuilder(config)
|
| 17 |
document = builder.build_document(doc_provider)
|
| 18 |
layout_builder(document, doc_provider)
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
@pytest.mark.config({"page_range": [0]})
|
| 12 |
+
def test_layout_replace(request, config, doc_provider, layout_model, ocr_error_model, detection_model):
|
| 13 |
# The llm layout builder replaces blocks - this makes sure text is still merged properly
|
| 14 |
layout_builder = LayoutBuilder(layout_model, config)
|
| 15 |
+
line_builder = LineBuilder(detection_model, ocr_error_model, config)
|
| 16 |
builder = DocumentBuilder(config)
|
| 17 |
document = builder.build_document(doc_provider)
|
| 18 |
layout_builder(document, doc_provider)
|
tests/conftest.py
CHANGED
|
@@ -54,10 +54,6 @@ def table_rec_model(model_dict):
|
|
| 54 |
def ocr_error_model(model_dict):
|
| 55 |
yield model_dict["ocr_error_model"]
|
| 56 |
|
| 57 |
-
@pytest.fixture(scope="session")
|
| 58 |
-
def inline_detection_model(model_dict):
|
| 59 |
-
yield model_dict["inline_detection_model"]
|
| 60 |
-
|
| 61 |
@pytest.fixture(scope="function")
|
| 62 |
def config(request):
|
| 63 |
config_mark = request.node.get_closest_marker("config")
|
|
@@ -93,9 +89,9 @@ def doc_provider(request, config, temp_doc):
|
|
| 93 |
yield provider_cls(temp_doc.name, config)
|
| 94 |
|
| 95 |
@pytest.fixture(scope="function")
|
| 96 |
-
def pdf_document(request, config, doc_provider, layout_model, ocr_error_model, recognition_model, detection_model
|
| 97 |
layout_builder = LayoutBuilder(layout_model, config)
|
| 98 |
-
line_builder = LineBuilder(detection_model,
|
| 99 |
ocr_builder = OcrBuilder(recognition_model, config)
|
| 100 |
builder = DocumentBuilder(config)
|
| 101 |
document = builder(doc_provider, layout_builder, line_builder, ocr_builder)
|
|
|
|
| 54 |
def ocr_error_model(model_dict):
|
| 55 |
yield model_dict["ocr_error_model"]
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
@pytest.fixture(scope="function")
|
| 58 |
def config(request):
|
| 59 |
config_mark = request.node.get_closest_marker("config")
|
|
|
|
| 89 |
yield provider_cls(temp_doc.name, config)
|
| 90 |
|
| 91 |
@pytest.fixture(scope="function")
|
| 92 |
+
def pdf_document(request, config, doc_provider, layout_model, ocr_error_model, recognition_model, detection_model):
|
| 93 |
layout_builder = LayoutBuilder(layout_model, config)
|
| 94 |
+
line_builder = LineBuilder(detection_model, ocr_error_model, config)
|
| 95 |
ocr_builder = OcrBuilder(recognition_model, config)
|
| 96 |
builder = DocumentBuilder(config)
|
| 97 |
document = builder(doc_provider, layout_builder, line_builder, ocr_builder)
|