guide / src /ner /predict.py
sangram kumar yerra
Phase 2 — EvidenceNER implementation
4c85df9
Raw
History Blame Contribute Delete
1.41 kB
"""
EvidenceNER inference helper.
Exposes extract_entities() used by the CMA tool `extract_entities` and by
DocumentProcessor after OCR. The EvidenceNER instance is cached at module
level after the first call so the checkpoint is only loaded once per process.
"""
from __future__ import annotations
import logging
from typing import Optional
from src.ner.model import Entity, EvidenceNER
logger = logging.getLogger(__name__)
_DEFAULT_MODEL_DIR = "models/evidence_ner"
_ner: Optional[EvidenceNER] = None
def init_ner(model_dir: str = _DEFAULT_MODEL_DIR) -> EvidenceNER:
"""
Explicitly initialise (or reload) the module-level EvidenceNER singleton.
Call this once at server startup for a predictable load-time cost.
"""
global _ner
logger.info("Loading EvidenceNER from %s …", model_dir)
_ner = EvidenceNER(model_dir)
return _ner
def extract_entities(
text: str, model_dir: str = _DEFAULT_MODEL_DIR
) -> list[Entity]:
"""
Extract named entities from *text* and return a list of Entity spans.
Loads the checkpoint from *model_dir* lazily on the first call and caches
the instance for subsequent calls.
Returns [] for empty input; never raises (caller is responsible for
catching EvidenceNER init errors at startup via init_ner()).
"""
global _ner
if _ner is None:
init_ner(model_dir)
return _ner.extract(text)