marker / tests /builders /test_strip_existing_ocr.py
Vik Paruchuri
Fix line merging bug
6951df2
import pytest
@pytest.mark.config({"page_range": [0], "strip_existing_ocr": True})
@pytest.mark.filename("handwritten.pdf")
def test_strip_ocr(doc_provider):
# Ensure that the OCR text isn't extracted
assert len(doc_provider.page_lines) == 0
@pytest.mark.config({"page_range": [0]})
@pytest.mark.filename("handwritten.pdf")
def test_keep_ocr(doc_provider):
assert len(doc_provider.page_lines) == 1