add custom handler and modify pipeline
Browse files
__pycache__/bert_for_sequence_classification.cpython-37.pyc
CHANGED
|
Binary files a/__pycache__/bert_for_sequence_classification.cpython-37.pyc and b/__pycache__/bert_for_sequence_classification.cpython-37.pyc differ
|
|
|
__pycache__/bibert_multitask_classification.cpython-37.pyc
CHANGED
|
Binary files a/__pycache__/bibert_multitask_classification.cpython-37.pyc and b/__pycache__/bibert_multitask_classification.cpython-37.pyc differ
|
|
|
__pycache__/handler.cpython-37.pyc
ADDED
|
Binary file (2.31 kB). View file
|
|
|
bibert_multitask_classification.py
CHANGED
|
@@ -9,6 +9,7 @@ def softmax(_outputs):
|
|
| 9 |
|
| 10 |
class BiBert_MultiTaskPipeline(Pipeline):
|
| 11 |
|
|
|
|
| 12 |
def _sanitize_parameters(self, **kwargs):
|
| 13 |
|
| 14 |
preprocess_kwargs = {}
|
|
@@ -18,7 +19,14 @@ class BiBert_MultiTaskPipeline(Pipeline):
|
|
| 18 |
forward_kwargs = {}
|
| 19 |
if "task_id" in kwargs:
|
| 20 |
forward_kwargs["task_id"] = kwargs["task_id"]
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
def preprocess(self, inputs, task_id):
|
| 24 |
return_tensors = self.framework
|
|
@@ -30,12 +38,19 @@ class BiBert_MultiTaskPipeline(Pipeline):
|
|
| 30 |
def _forward(self, model_inputs, task_id):
|
| 31 |
return self.model(**model_inputs)
|
| 32 |
|
| 33 |
-
def postprocess(self, model_outputs):
|
| 34 |
outputs = model_outputs["logits"][0]
|
| 35 |
outputs = outputs.numpy()
|
| 36 |
scores = softmax(outputs)
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
dict_scores = [
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
return dict_scores
|
|
|
|
| 9 |
|
| 10 |
class BiBert_MultiTaskPipeline(Pipeline):
|
| 11 |
|
| 12 |
+
|
| 13 |
def _sanitize_parameters(self, **kwargs):
|
| 14 |
|
| 15 |
preprocess_kwargs = {}
|
|
|
|
| 19 |
forward_kwargs = {}
|
| 20 |
if "task_id" in kwargs:
|
| 21 |
forward_kwargs["task_id"] = kwargs["task_id"]
|
| 22 |
+
|
| 23 |
+
postprocess_kwargs = {}
|
| 24 |
+
if "top_k" in kwargs:
|
| 25 |
+
postprocess_kwargs["top_k"] = kwargs["top_k"]
|
| 26 |
+
postprocess_kwargs["_legacy"] = False
|
| 27 |
+
return preprocess_kwargs, forward_kwargs, postprocess_kwargs
|
| 28 |
+
|
| 29 |
+
|
| 30 |
|
| 31 |
def preprocess(self, inputs, task_id):
|
| 32 |
return_tensors = self.framework
|
|
|
|
| 38 |
def _forward(self, model_inputs, task_id):
|
| 39 |
return self.model(**model_inputs)
|
| 40 |
|
| 41 |
+
def postprocess(self, model_outputs, top_k=1, _legacy=True):
|
| 42 |
outputs = model_outputs["logits"][0]
|
| 43 |
outputs = outputs.numpy()
|
| 44 |
scores = softmax(outputs)
|
| 45 |
|
| 46 |
+
if top_k == 1 and _legacy:
|
| 47 |
+
return {"label": self.model.config.id2label[scores.argmax().item()], "score": scores.max().item()}
|
| 48 |
+
|
| 49 |
dict_scores = [
|
| 50 |
+
{"label": self.model.config.id2label[i], "score": score.item()} for i, score in enumerate(scores)
|
| 51 |
+
]
|
| 52 |
+
if not _legacy:
|
| 53 |
+
dict_scores.sort(key=lambda x: x["score"], reverse=True)
|
| 54 |
+
if top_k is not None:
|
| 55 |
+
dict_scores = dict_scores[:top_k]
|
| 56 |
return dict_scores
|
handler.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List, Any
|
| 2 |
+
from dataclasses import dataclass
|
| 3 |
+
import torch
|
| 4 |
+
from transformers import AutoTokenizer
|
| 5 |
+
from transformers import pipeline
|
| 6 |
+
from transformers.pipelines import PIPELINE_REGISTRY
|
| 7 |
+
from bibert_multitask_classification import BiBert_MultiTaskPipeline
|
| 8 |
+
from bert_for_sequence_classification import BertForSequenceClassification
|
| 9 |
+
|
| 10 |
+
@dataclass
|
| 11 |
+
class Task:
|
| 12 |
+
id: int
|
| 13 |
+
name: str
|
| 14 |
+
type: str
|
| 15 |
+
num_labels: int
|
| 16 |
+
|
| 17 |
+
PIPELINE_REGISTRY.register_pipeline(
|
| 18 |
+
"bibert-multitask-classification",
|
| 19 |
+
pipeline_class=BiBert_MultiTaskPipeline,
|
| 20 |
+
pt_model=BertForSequenceClassification
|
| 21 |
+
)
|
| 22 |
+
class EndpointHandler():
|
| 23 |
+
def __init__(self, path=""):
|
| 24 |
+
# Preload all the elements you are going to need at inference.
|
| 25 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 26 |
+
tokenizer = AutoTokenizer.from_pretrained(path)
|
| 27 |
+
|
| 28 |
+
tasks = [
|
| 29 |
+
Task(id=0, name='label_classification', type='seq_classification', num_labels=5),
|
| 30 |
+
Task(id=1, name='binary_classification', type='seq_classification', num_labels=2)
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
model = BertForSequenceClassification.from_pretrained(path, tasks_map=tasks).to(device)
|
| 35 |
+
|
| 36 |
+
self.classifier_s = pipeline("bibert-multitask-classification", model = model, task_id="0", tokenizer=tokenizer, device = device)
|
| 37 |
+
self.classifier_p = pipeline("bibert-multitask-classification", model = model, task_id="1", tokenizer=tokenizer, device = device)
|
| 38 |
+
|
| 39 |
+
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 40 |
+
"""
|
| 41 |
+
data args:
|
| 42 |
+
inputs (:obj: `str` | `PIL.Image` | `np.array`)
|
| 43 |
+
kwargs
|
| 44 |
+
Return:
|
| 45 |
+
A :obj:`list` | `dict`: will be serialized and returned
|
| 46 |
+
"""
|
| 47 |
+
inputs = data.pop("text", data)
|
| 48 |
+
lang = data.pop("lang", data)
|
| 49 |
+
if isinstance(inputs, str):
|
| 50 |
+
inputs = [inputs]
|
| 51 |
+
|
| 52 |
+
prediction_p = self.classifier_p(inputs)
|
| 53 |
+
label = prediction_p[0]['label']
|
| 54 |
+
score = prediction_p[0]['score']
|
| 55 |
+
|
| 56 |
+
if label == '0' and score >= 0.75:
|
| 57 |
+
label = 2
|
| 58 |
+
return {"label":label, "score": score}
|
| 59 |
+
else:
|
| 60 |
+
prediction_s = self.classifier_s(inputs)
|
| 61 |
+
label = prediction_s[0]['label']
|
| 62 |
+
score = prediction_s[0]['score']
|
| 63 |
+
return prediction_s
|