add custom handler and modify pipeline

Browse files

Files changed (5) hide show

__pycache__/bert_for_sequence_classification.cpython-37.pyc +0 -0
__pycache__/bibert_multitask_classification.cpython-37.pyc +0 -0
__pycache__/handler.cpython-37.pyc +0 -0
bibert_multitask_classification.py +19 -4
handler.py +63 -0

__pycache__/bert_for_sequence_classification.cpython-37.pyc CHANGED Viewed

Binary files a/__pycache__/bert_for_sequence_classification.cpython-37.pyc and b/__pycache__/bert_for_sequence_classification.cpython-37.pyc differ

__pycache__/bibert_multitask_classification.cpython-37.pyc CHANGED Viewed

Binary files a/__pycache__/bibert_multitask_classification.cpython-37.pyc and b/__pycache__/bibert_multitask_classification.cpython-37.pyc differ

__pycache__/handler.cpython-37.pyc ADDED Viewed

Binary file (2.31 kB). View file

bibert_multitask_classification.py CHANGED Viewed

@@ -9,6 +9,7 @@ def softmax(_outputs):
 class BiBert_MultiTaskPipeline(Pipeline):
   def _sanitize_parameters(self, **kwargs):
     preprocess_kwargs = {}
@@ -18,7 +19,14 @@ class BiBert_MultiTaskPipeline(Pipeline):
     forward_kwargs = {}
     if "task_id" in kwargs:
       forward_kwargs["task_id"] = kwargs["task_id"]
-    return preprocess_kwargs, forward_kwargs, {}
   def preprocess(self, inputs, task_id):
     return_tensors = self.framework
@@ -30,12 +38,19 @@ class BiBert_MultiTaskPipeline(Pipeline):
   def _forward(self, model_inputs, task_id):
     return self.model(**model_inputs)
-  def postprocess(self, model_outputs):
     outputs = model_outputs["logits"][0]
     outputs = outputs.numpy()
     scores = softmax(outputs)
     dict_scores = [
-            {"label": self.model.config.id2label[i], "score": score.item()} for i, score in enumerate(scores)
-        ]
     return dict_scores

 class BiBert_MultiTaskPipeline(Pipeline):
   def _sanitize_parameters(self, **kwargs):
     preprocess_kwargs = {}
     forward_kwargs = {}
     if "task_id" in kwargs:
       forward_kwargs["task_id"] = kwargs["task_id"]
+    postprocess_kwargs = {}
+    if "top_k" in kwargs:
+      postprocess_kwargs["top_k"] = kwargs["top_k"]
+      postprocess_kwargs["_legacy"] = False
+    return preprocess_kwargs, forward_kwargs, postprocess_kwargs
   def preprocess(self, inputs, task_id):
     return_tensors = self.framework
   def _forward(self, model_inputs, task_id):
     return self.model(**model_inputs)
+  def postprocess(self, model_outputs, top_k=1, _legacy=True):
     outputs = model_outputs["logits"][0]
     outputs = outputs.numpy()
     scores = softmax(outputs)
+    if top_k == 1 and _legacy:
+      return {"label": self.model.config.id2label[scores.argmax().item()], "score": scores.max().item()}
     dict_scores = [
+        {"label": self.model.config.id2label[i], "score": score.item()} for i, score in enumerate(scores)
+    ]
+    if not _legacy:
+      dict_scores.sort(key=lambda x: x["score"], reverse=True)
+      if top_k is not None:
+          dict_scores = dict_scores[:top_k]
     return dict_scores

handler.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from typing import Dict, List, Any
+from dataclasses import dataclass
+import torch
+from transformers import AutoTokenizer
+from transformers import pipeline
+from transformers.pipelines import PIPELINE_REGISTRY
+from bibert_multitask_classification import BiBert_MultiTaskPipeline
+from bert_for_sequence_classification import BertForSequenceClassification
+@dataclass
+class Task:
+  id: int
+  name: str
+  type: str
+  num_labels: int
+PIPELINE_REGISTRY.register_pipeline(
+    "bibert-multitask-classification",
+    pipeline_class=BiBert_MultiTaskPipeline,
+    pt_model=BertForSequenceClassification
+    )
+class EndpointHandler():
+  def __init__(self, path=""):
+    # Preload all the elements you are going to need at inference.
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    tokenizer = AutoTokenizer.from_pretrained(path)
+    tasks = [
+        Task(id=0, name='label_classification', type='seq_classification', num_labels=5),
+        Task(id=1, name='binary_classification', type='seq_classification', num_labels=2)
+        ]
+    model = BertForSequenceClassification.from_pretrained(path, tasks_map=tasks).to(device)
+    self.classifier_s = pipeline("bibert-multitask-classification", model = model, task_id="0", tokenizer=tokenizer, device = device)
+    self.classifier_p = pipeline("bibert-multitask-classification", model = model, task_id="1", tokenizer=tokenizer, device = device)
+  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """
+    data args:
+        inputs (:obj: `str` | `PIL.Image` | `np.array`)
+        kwargs
+    Return:
+        A :obj:`list` | `dict`: will be serialized and returned
+    """
+    inputs = data.pop("text", data)
+    lang = data.pop("lang", data)
+    if isinstance(inputs, str):
+      inputs = [inputs]
+    prediction_p = self.classifier_p(inputs)
+    label = prediction_p[0]['label']
+    score = prediction_p[0]['score']
+    if label == '0' and score >= 0.75:
+      label = 2
+      return {"label":label, "score": score}
+    else:
+      prediction_s = self.classifier_s(inputs)
+      label = prediction_s[0]['label']
+      score = prediction_s[0]['score']
+    return prediction_s