File size: 1,378 Bytes
a88e758 eaf627b a88e758 2f43cf4 a88e758 5fef978 a88e758 533561e a88e758 5fef978 a88e758 5fef978 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | import spacy
from typing import Any
def _load_spacy_model(name: str) -> spacy.Language:
if not hasattr(_load_spacy_model, "nlp"):
# pipeline info https://spacy.io/models/en#en_core_web_lg
all_except_ner: list[str] = [
"tok2vec",
"tagger",
"parser",
"attribute_ruler",
"lemmatizer"]
nlp = spacy.load(name=name, exclude=all_except_ner)
_load_spacy_model.nlp = nlp
print(f"Loaded {nlp.meta.get('name', 'unknown')} model from {nlp.path}")
return _load_spacy_model.nlp
class EndpointHandler:
def __init__(self, path: str = ""):
print(f"EndpointHandler(path='{path}')")
self._nlp: spacy.Language = _load_spacy_model(name="en_core_web_lg")
def __call__(self, data: dict[str, Any]) -> list[dict[str, Any]]:
inputs: str = data.pop("inputs", "")
if not inputs:
return []
outputs: list[dict[str, Any]] = []
doc = self._nlp(text=inputs)
for ent in doc.ents:
if ent.label_ != "PERSON":
continue
entity: dict = {
"entity_group": ent.label_,
"score": 1,
"word": ent.text,
"start": ent.start_char,
"end": ent.end_char}
outputs.append(entity)
return outputs
|