NORMA-DEV
/

Gliner_haystack

Token Classification

Model card Files Files and versions

Rejebc commited on Aug 23, 2024

Commit

1903c5a

·

verified ·

1 Parent(s): d2dffba

Update handler.py

Files changed (1) hide show

handler.py +23 -28

handler.py CHANGED Viewed

@@ -1,42 +1,37 @@
 from typing import Dict, List, Any
-from transformers import pipeline, AutoConfig, AutoModelForTokenClassification, AutoTokenizer, BertTokenizerFast
 import os
-class EndpointHandler():
     def __init__(self, path=""):
-        dir_model = "urchade/gliner_multi-v2.1"
-        config_path = os.path.join(path, "gliner_config.json")
-        if not os.path.exists(config_path):
-            raise FileNotFoundError(f"Custom configuration file not found at {config_path}")
-        # Load the custom configuration
-        config = AutoConfig.from_pretrained(config_path)
-        # Load the model using the custom configuration
-        self.model = AutoModelForTokenClassification.from_pretrained(dir_model, config=config)
-        # Initialize the pipeline with the model and tokenizer
-        # Use a pre-trained tokenizer compatible with your model
-        self.tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
-        # Use a pipeline appropriate for your task. Here we use "token-classification" for NER (Named Entity Recognition).
-        self.pipeline = pipeline("token-classification", model=path, tokenizer=self.tokenizer)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Args:
             data (Dict[str, Any]): The input data including:
                 - "inputs": The text input from which to extract information.
         Returns:
-            List[Dict[str, Any]]: The extracted information from the text.
         """
-        # Get inputs
         inputs = data.get("inputs", "")
-        # Run the pipeline for text extraction
-        extraction_results = self.pipeline(inputs)
-        # Process and return the results as needed
-        return extraction_results

 from typing import Dict, List, Any
+from gliner import GLiNER
 import os
+class EndpointHandler:
     def __init__(self, path=""):
+        # Initialize the GLiNER model
+        self.model = GLiNER.from_pretrained("urchade/gliner_multi-v2.1")
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Args:
             data (Dict[str, Any]): The input data including:
                 - "inputs": The text input from which to extract information.
+                - "labels": The labels to predict entities for.
         Returns:
+            List[Dict[str, Any]]: The extracted entities from the text, formatted as required.
         """
+        # Get inputs and labels
         inputs = data.get("inputs", "")
+        labels = data.get("labels", [])
+        # Predict entities using GLiNER
+        entities = self.model.predict_entities(inputs, labels)
+        # Format the results to match the expected output structure
+        formatted_results = []
+        for entity in entities:
+            formatted_entity = {
+                "word": entity["text"],
+                "entity_group": entity["label"],  # Assuming entity["label"] contains the label
+                "score": entity.get("score", 1.0)  # Assuming a default score of 1.0 if not provided
+            }
+            formatted_results.append(formatted_entity)
+        return formatted_results