|
|
from typing import Dict, Any, List |
|
|
from transformers import AutoTokenizer |
|
|
from gliner import GLiNER |
|
|
|
|
|
|
|
|
class EndpointHandler: |
|
|
def __init__(self, path: str = ""): |
|
|
"""Initialize the model and tokenizer""" |
|
|
self.model = GLiNER.from_pretrained(path if path else "urchade/gliner_multi-v2.1") |
|
|
self.tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large") |
|
|
self.initialized = True |
|
|
|
|
|
def __call__(self, data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]: |
|
|
""" |
|
|
Args: |
|
|
data (Dict[str, Any]): Dictionary containing: |
|
|
- inputs/text (str): Input text |
|
|
- labels (str): Comma-separated labels |
|
|
- threshold (float, optional): Confidence threshold |
|
|
- nested_ner (bool, optional): Enable nested NER |
|
|
Returns: |
|
|
Dict[str, List[Dict[str, Any]]]: Dictionary with predicted entities |
|
|
""" |
|
|
|
|
|
text = data.pop("inputs", data.get("text", "")) |
|
|
labels = data.get("labels", "").split(",") |
|
|
threshold = float(data.get("threshold", 0.3)) |
|
|
nested_ner = bool(data.get("nested_ner", True)) |
|
|
|
|
|
|
|
|
entities = self.model.predict_entities( |
|
|
text, |
|
|
labels, |
|
|
flat_ner=not nested_ner, |
|
|
threshold=threshold |
|
|
) |
|
|
|
|
|
|
|
|
return { |
|
|
"entities": [ |
|
|
{ |
|
|
"entity": entity["label"], |
|
|
"word": entity["text"], |
|
|
"start": entity["start"], |
|
|
"end": entity["end"], |
|
|
"score": 0 |
|
|
} |
|
|
for entity in entities |
|
|
] |
|
|
} |