"""pdfsys-router — two-stage routing for the pdfsys extraction pipeline. Stage A (cheap): classify text-ok vs needs-ocr from PyMuPDF features, using a ported FinePDFs XGBoost classifier over 124 hand-crafted features. Stage B (uses layout cache): for needs-ocr, read the LayoutDocument written by pdfsys-layout-analyser and decide pipeline vs vlm based on whether complex regions (tables / formulas) exist. Stage B is not in the MVP. """ from __future__ import annotations from .classifier import Router, RouterDecision from .feature_extractor import PDFFeatureExtractor, flatten_per_page_features from .xgb_model import XgbRouterModel, default_weights_path __version__ = "0.0.1" __all__ = [ "__version__", "Router", "RouterDecision", "PDFFeatureExtractor", "flatten_per_page_features", "XgbRouterModel", "default_weights_path", ]