| import pytest | |
| from marker.processors.ignoretext import IgnoreTextProcessor | |
| from marker.schema import BlockTypes | |
| def test_ignoretext_processor(pdf_document): | |
| processor = IgnoreTextProcessor() | |
| processor(pdf_document) | |
| page1_header = pdf_document.pages[1].contained_blocks(pdf_document, [BlockTypes.Text])[0] | |
| assert "bioRxiv" in page1_header.raw_text(pdf_document) | |
| assert page1_header.ignore_for_output is True | |