| | import spacy |
| |
|
| | from typing import Any |
| |
|
| |
|
| | def _load_spacy_model(name: str) -> spacy.Language: |
| | if not hasattr(_load_spacy_model, "nlp"): |
| | |
| | all_except_ner: list[str] = [ |
| | "tok2vec", |
| | "tagger", |
| | "parser", |
| | "attribute_ruler", |
| | "lemmatizer"] |
| |
|
| | nlp = spacy.load(name=name, exclude=all_except_ner) |
| | _load_spacy_model.nlp = nlp |
| | print(f"Loaded {nlp.meta.get('name', 'unknown')} model from {nlp.path}") |
| |
|
| | return _load_spacy_model.nlp |
| |
|
| |
|
| | class EndpointHandler: |
| | def __init__(self, path: str = ""): |
| | print(f"EndpointHandler(path='{path}')") |
| | self._nlp: spacy.Language = _load_spacy_model(name="en_core_web_lg") |
| |
|
| | def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]: |
| | inputs: str = data.pop("inputs", "") |
| | if not inputs: |
| | return [] |
| |
|
| | outputs: list[dict[str, Any]] = [] |
| |
|
| | doc = self._nlp(text=inputs) |
| | for ent in doc.ents: |
| | if ent.label_ != "PERSON": |
| | continue |
| |
|
| | entity: dict = { |
| | "entity_group": ent.label_, |
| | "score": 1, |
| | "word": ent.text, |
| | "start": ent.start_char, |
| | "end": ent.end_char} |
| |
|
| | outputs.append(entity) |
| |
|
| | return outputs |
| |
|