from typing import Dict, Any, List from transformers import AutoTokenizer from gliner import GLiNER class EndpointHandler: def __init__(self, path: str = ""): """Initialize the model and tokenizer""" self.model = GLiNER.from_pretrained(path if path else "urchade/gliner_multi-v2.1") self.tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large") self.initialized = True def __call__(self, data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]: """ Args: data (Dict[str, Any]): Dictionary containing: - inputs/text (str): Input text - labels (str): Comma-separated labels - threshold (float, optional): Confidence threshold - nested_ner (bool, optional): Enable nested NER Returns: Dict[str, List[Dict[str, Any]]]: Dictionary with predicted entities """ # Get inputs - handle both "inputs" and "text" keys for flexibility text = data.pop("inputs", data.get("text", "")) labels = data.get("labels", "").split(",") threshold = float(data.get("threshold", 0.3)) nested_ner = bool(data.get("nested_ner", True)) # Run prediction entities = self.model.predict_entities( text, labels, flat_ner=not nested_ner, threshold=threshold ) # Format output return { "entities": [ { "entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": 0 } for entity in entities ] }