gliner

File size: 1,793 Bytes

7d5296f
45bd82d
 
 
7d5296f
 
 
 
 
45bd82d
 
 
7d5296f
45bd82d
 
7d5296f
 
45bd82d
 
 
 
7d5296f
45bd82d
7d5296f
45bd82d

from typing import Dict, Any, List
from transformers import AutoTokenizer
from gliner import GLiNER


class EndpointHandler:
    def __init__(self, path: str = ""):
        """Initialize the model and tokenizer"""
        self.model = GLiNER.from_pretrained(path if path else "urchade/gliner_multi-v2.1")
        self.tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large")
        self.initialized = True

    def __call__(self, data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
        """
        Args:
            data (Dict[str, Any]): Dictionary containing:
                - inputs/text (str): Input text
                - labels (str): Comma-separated labels
                - threshold (float, optional): Confidence threshold
                - nested_ner (bool, optional): Enable nested NER
        Returns:
            Dict[str, List[Dict[str, Any]]]: Dictionary with predicted entities
        """
        # Get inputs - handle both "inputs" and "text" keys for flexibility
        text = data.pop("inputs", data.get("text", ""))
        labels = data.get("labels", "").split(",")
        threshold = float(data.get("threshold", 0.3))
        nested_ner = bool(data.get("nested_ner", True))

        # Run prediction
        entities = self.model.predict_entities(
            text,
            labels,
            flat_ner=not nested_ner,
            threshold=threshold
        )

        # Format output
        return {
            "entities": [
                {
                    "entity": entity["label"],
                    "word": entity["text"],
                    "start": entity["start"],
                    "end": entity["end"],
                    "score": 0
                }
                for entity in entities
            ]
        }