|
|
import sys |
|
|
from contextlib import suppress |
|
|
import click |
|
|
|
|
|
from marker.config.printer import CustomClickPrinter |
|
|
from marker.config.crawler import crawler |
|
|
from marker.config.parser import ConfigParser |
|
|
|
|
|
|
|
|
def capture_kwargs(argv): |
|
|
command = click.command(cls=CustomClickPrinter) |
|
|
captured_kwargs = {} |
|
|
|
|
|
def parse_args(**kwargs): |
|
|
captured_kwargs.update(kwargs) |
|
|
return kwargs |
|
|
|
|
|
original_argv = sys.argv |
|
|
sys.argv = argv |
|
|
try: |
|
|
with suppress(SystemExit): |
|
|
command(ConfigParser.common_options(parse_args))() |
|
|
finally: |
|
|
sys.argv = original_argv |
|
|
|
|
|
return captured_kwargs |
|
|
|
|
|
|
|
|
def test_config_parser(): |
|
|
sys.argv = [ |
|
|
"test", |
|
|
"--disable_multiprocessing", |
|
|
"--output_dir", |
|
|
"output_dir", |
|
|
"--height_tolerance", |
|
|
"0.5", |
|
|
] |
|
|
kwargs = capture_kwargs(sys.argv) |
|
|
parser = ConfigParser(kwargs) |
|
|
config_dict = parser.generate_config_dict() |
|
|
|
|
|
|
|
|
assert kwargs["disable_multiprocessing"] |
|
|
assert kwargs["output_dir"] == "output_dir" |
|
|
|
|
|
assert config_dict["pdftext_workers"] == 1 |
|
|
assert config_dict["height_tolerance"] == 0.5 |
|
|
|
|
|
|
|
|
def test_config_none(): |
|
|
kwargs = capture_kwargs(["test"]) |
|
|
|
|
|
for key in crawler.attr_set: |
|
|
|
|
|
value = None |
|
|
assert kwargs.get(key) is value |
|
|
|
|
|
|
|
|
def test_config_llm(): |
|
|
kwargs = capture_kwargs(["test", "--use_llm"]) |
|
|
parser = ConfigParser(kwargs) |
|
|
config_dict = parser.generate_config_dict() |
|
|
|
|
|
|
|
|
assert config_dict["use_llm"] |
|
|
|
|
|
|
|
|
def test_config_force_ocr(): |
|
|
kwargs = capture_kwargs(["test", "--force_ocr"]) |
|
|
parser = ConfigParser(kwargs) |
|
|
config_dict = parser.generate_config_dict() |
|
|
|
|
|
|
|
|
assert config_dict["force_ocr"] |
|
|
|