Spaces:
Sleeping
Sleeping
| """Bake the NER-chain models into the Docker image at build time. | |
| Run as a Dockerfile RUN step so the ~1 GB of HF weights + spaCy + skillNer's | |
| nltk/EMSI assets are present in the image layers. The first resume parse in | |
| production is then fast and offline-safe instead of triggering a ~1 GB download | |
| on an ephemeral disk. | |
| Each load is wrapped so one flaky download cannot fail the whole image build — | |
| a skipped model simply falls back to a lazy download at runtime (slower first | |
| parse, but the deploy still succeeds). Model IDs mirror | |
| apps/accounts/ner/{nucha,jobbert,sbert,skillner}.py exactly. No Django/DB needed. | |
| """ | |
| from __future__ import annotations | |
| def _try(label, fn): | |
| try: | |
| fn() | |
| print(f"prefetch ok: {label}") | |
| except Exception as e: # noqa: BLE001 - intentional: never fail the build | |
| print(f"prefetch SKIPPED {label}: {e}") | |
| def main() -> None: | |
| from transformers import pipeline | |
| _try( | |
| "nucha", | |
| lambda: pipeline( | |
| task="ner", | |
| model="Nucha/Nucha_ITSkillNER_BERT", | |
| aggregation_strategy="simple", | |
| ), | |
| ) | |
| _try( | |
| "jobbert", | |
| lambda: pipeline( | |
| task="ner", | |
| model="jjzha/jobbert_skill_extraction", | |
| aggregation_strategy="simple", | |
| ), | |
| ) | |
| from sentence_transformers import SentenceTransformer | |
| _try("sbert", lambda: SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")) | |
| import spacy | |
| from skillNer.general_params import SKILL_DB | |
| from skillNer.skill_extractor_class import SkillExtractor | |
| from spacy.matcher import PhraseMatcher | |
| _try( | |
| "skillner", | |
| lambda: SkillExtractor(spacy.load("en_core_web_sm"), SKILL_DB, PhraseMatcher), | |
| ) | |
| print("prefetch complete") | |
| if __name__ == "__main__": | |
| main() | |