"""pdfsys-router — two-stage routing for the pdfsys extraction pipeline.

Stage A (cheap): classify text-ok vs needs-ocr from PyMuPDF features, using
a ported FinePDFs XGBoost classifier over 124 hand-crafted features.

Stage B (uses layout cache): for needs-ocr, read the LayoutDocument written
by pdfsys-layout-analyser and decide pipeline vs vlm based on whether
complex regions (tables / formulas) exist. Stage B is not in the MVP.
"""

from __future__ import annotations

from .classifier import Router, RouterDecision
from .feature_extractor import PDFFeatureExtractor, flatten_per_page_features
from .xgb_model import XgbRouterModel, default_weights_path

__version__ = "0.0.1"

__all__ = [
    "__version__",
    "Router",
    "RouterDecision",
    "PDFFeatureExtractor",
    "flatten_per_page_features",
    "XgbRouterModel",
    "default_weights_path",
]