Spaces:
Sleeping
Sleeping
| """ | |
| EvidenceNER inference helper. | |
| Exposes extract_entities() used by the CMA tool `extract_entities` and by | |
| DocumentProcessor after OCR. The EvidenceNER instance is cached at module | |
| level after the first call so the checkpoint is only loaded once per process. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from typing import Optional | |
| from src.ner.model import Entity, EvidenceNER | |
| logger = logging.getLogger(__name__) | |
| _DEFAULT_MODEL_DIR = "models/evidence_ner" | |
| _ner: Optional[EvidenceNER] = None | |
| def init_ner(model_dir: str = _DEFAULT_MODEL_DIR) -> EvidenceNER: | |
| """ | |
| Explicitly initialise (or reload) the module-level EvidenceNER singleton. | |
| Call this once at server startup for a predictable load-time cost. | |
| """ | |
| global _ner | |
| logger.info("Loading EvidenceNER from %s …", model_dir) | |
| _ner = EvidenceNER(model_dir) | |
| return _ner | |
| def extract_entities( | |
| text: str, model_dir: str = _DEFAULT_MODEL_DIR | |
| ) -> list[Entity]: | |
| """ | |
| Extract named entities from *text* and return a list of Entity spans. | |
| Loads the checkpoint from *model_dir* lazily on the first call and caches | |
| the instance for subsequent calls. | |
| Returns [] for empty input; never raises (caller is responsible for | |
| catching EvidenceNER init errors at startup via init_ner()). | |
| """ | |
| global _ner | |
| if _ner is None: | |
| init_ner(model_dir) | |
| return _ner.extract(text) | |