Token Classification
GLiNER
PyTorch
multilingual
bert
File size: 1,731 Bytes
4e97a81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from typing import Dict, List, Any
from transformers import pipeline, AutoConfig, AutoModelForTokenClassification, AutoTokenizer, BertTokenizerFast
import os


class EndpointHandler():
    def __init__(self, path=""):
        dir_model = "urchade/gliner_multi-v2.1"

        config_path = os.path.join(path, "gliner_config.json")
        if not os.path.exists(config_path):
            raise FileNotFoundError(f"Custom configuration file not found at {config_path}")

        # Load the custom configuration
        config = AutoConfig.from_pretrained(config_path)

        # Load the model using the custom configuration
        self.model = AutoModelForTokenClassification.from_pretrained(dir_model, config=config)

        # Initialize the pipeline with the model and tokenizer
        # Use a pre-trained tokenizer compatible with your model
        self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
        # Use a pipeline appropriate for your task. Here we use "token-classification" for NER (Named Entity Recognition).
        self.pipeline = pipeline("token-classification", model=path, tokenizer=self.tokenizer)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Args:
            data (Dict[str, Any]): The input data including:
                - "inputs": The text input from which to extract information.

        Returns:
            List[Dict[str, Any]]: The extracted information from the text.
        """
        # Get inputs
        inputs = data.get("inputs", "")

        # Run the pipeline for text extraction
        extraction_results = self.pipeline(inputs)

        # Process and return the results as needed
        return extraction_results