File size: 1,793 Bytes
7d5296f 45bd82d 7d5296f 45bd82d 7d5296f 45bd82d 7d5296f 45bd82d 7d5296f 45bd82d 7d5296f 45bd82d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from typing import Dict, Any, List
from transformers import AutoTokenizer
from gliner import GLiNER
class EndpointHandler:
def __init__(self, path: str = ""):
"""Initialize the model and tokenizer"""
self.model = GLiNER.from_pretrained(path if path else "urchade/gliner_multi-v2.1")
self.tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large")
self.initialized = True
def __call__(self, data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
"""
Args:
data (Dict[str, Any]): Dictionary containing:
- inputs/text (str): Input text
- labels (str): Comma-separated labels
- threshold (float, optional): Confidence threshold
- nested_ner (bool, optional): Enable nested NER
Returns:
Dict[str, List[Dict[str, Any]]]: Dictionary with predicted entities
"""
# Get inputs - handle both "inputs" and "text" keys for flexibility
text = data.pop("inputs", data.get("text", ""))
labels = data.get("labels", "").split(",")
threshold = float(data.get("threshold", 0.3))
nested_ner = bool(data.get("nested_ner", True))
# Run prediction
entities = self.model.predict_entities(
text,
labels,
flat_ner=not nested_ner,
threshold=threshold
)
# Format output
return {
"entities": [
{
"entity": entity["label"],
"word": entity["text"],
"start": entity["start"],
"end": entity["end"],
"score": 0
}
for entity in entities
]
} |