"""Bake the NER-chain models into the Docker image at build time.

Run as a Dockerfile RUN step so the ~1 GB of HF weights + spaCy + skillNer's
nltk/EMSI assets are present in the image layers. The first resume parse in
production is then fast and offline-safe instead of triggering a ~1 GB download
on an ephemeral disk.

Each load is wrapped so one flaky download cannot fail the whole image build —
a skipped model simply falls back to a lazy download at runtime (slower first
parse, but the deploy still succeeds). Model IDs mirror
apps/accounts/ner/{nucha,jobbert,sbert,skillner}.py exactly. No Django/DB needed.
"""
from __future__ import annotations


def _try(label, fn):
    try:
        fn()
        print(f"prefetch ok: {label}")
    except Exception as e:  # noqa: BLE001 - intentional: never fail the build
        print(f"prefetch SKIPPED {label}: {e}")


def main() -> None:
    from transformers import pipeline

    _try(
        "nucha",
        lambda: pipeline(
            task="ner",
            model="Nucha/Nucha_ITSkillNER_BERT",
            aggregation_strategy="simple",
        ),
    )
    _try(
        "jobbert",
        lambda: pipeline(
            task="ner",
            model="jjzha/jobbert_skill_extraction",
            aggregation_strategy="simple",
        ),
    )

    from sentence_transformers import SentenceTransformer

    _try("sbert", lambda: SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2"))

    import spacy
    from skillNer.general_params import SKILL_DB
    from skillNer.skill_extractor_class import SkillExtractor
    from spacy.matcher import PhraseMatcher

    _try(
        "skillner",
        lambda: SkillExtractor(spacy.load("en_core_web_sm"), SKILL_DB, PhraseMatcher),
    )

    print("prefetch complete")


if __name__ == "__main__":
    main()