Kalaoke commited on
Commit
2c5bd8b
·
1 Parent(s): eb39c91

add custom handler and modify pipeline

Browse files
__pycache__/bert_for_sequence_classification.cpython-37.pyc CHANGED
Binary files a/__pycache__/bert_for_sequence_classification.cpython-37.pyc and b/__pycache__/bert_for_sequence_classification.cpython-37.pyc differ
 
__pycache__/bibert_multitask_classification.cpython-37.pyc CHANGED
Binary files a/__pycache__/bibert_multitask_classification.cpython-37.pyc and b/__pycache__/bibert_multitask_classification.cpython-37.pyc differ
 
__pycache__/handler.cpython-37.pyc ADDED
Binary file (2.31 kB). View file
 
bibert_multitask_classification.py CHANGED
@@ -9,6 +9,7 @@ def softmax(_outputs):
9
 
10
  class BiBert_MultiTaskPipeline(Pipeline):
11
 
 
12
  def _sanitize_parameters(self, **kwargs):
13
 
14
  preprocess_kwargs = {}
@@ -18,7 +19,14 @@ class BiBert_MultiTaskPipeline(Pipeline):
18
  forward_kwargs = {}
19
  if "task_id" in kwargs:
20
  forward_kwargs["task_id"] = kwargs["task_id"]
21
- return preprocess_kwargs, forward_kwargs, {}
 
 
 
 
 
 
 
22
 
23
  def preprocess(self, inputs, task_id):
24
  return_tensors = self.framework
@@ -30,12 +38,19 @@ class BiBert_MultiTaskPipeline(Pipeline):
30
  def _forward(self, model_inputs, task_id):
31
  return self.model(**model_inputs)
32
 
33
- def postprocess(self, model_outputs):
34
  outputs = model_outputs["logits"][0]
35
  outputs = outputs.numpy()
36
  scores = softmax(outputs)
37
 
 
 
 
38
  dict_scores = [
39
- {"label": self.model.config.id2label[i], "score": score.item()} for i, score in enumerate(scores)
40
- ]
 
 
 
 
41
  return dict_scores
 
9
 
10
  class BiBert_MultiTaskPipeline(Pipeline):
11
 
12
+
13
  def _sanitize_parameters(self, **kwargs):
14
 
15
  preprocess_kwargs = {}
 
19
  forward_kwargs = {}
20
  if "task_id" in kwargs:
21
  forward_kwargs["task_id"] = kwargs["task_id"]
22
+
23
+ postprocess_kwargs = {}
24
+ if "top_k" in kwargs:
25
+ postprocess_kwargs["top_k"] = kwargs["top_k"]
26
+ postprocess_kwargs["_legacy"] = False
27
+ return preprocess_kwargs, forward_kwargs, postprocess_kwargs
28
+
29
+
30
 
31
  def preprocess(self, inputs, task_id):
32
  return_tensors = self.framework
 
38
  def _forward(self, model_inputs, task_id):
39
  return self.model(**model_inputs)
40
 
41
+ def postprocess(self, model_outputs, top_k=1, _legacy=True):
42
  outputs = model_outputs["logits"][0]
43
  outputs = outputs.numpy()
44
  scores = softmax(outputs)
45
 
46
+ if top_k == 1 and _legacy:
47
+ return {"label": self.model.config.id2label[scores.argmax().item()], "score": scores.max().item()}
48
+
49
  dict_scores = [
50
+ {"label": self.model.config.id2label[i], "score": score.item()} for i, score in enumerate(scores)
51
+ ]
52
+ if not _legacy:
53
+ dict_scores.sort(key=lambda x: x["score"], reverse=True)
54
+ if top_k is not None:
55
+ dict_scores = dict_scores[:top_k]
56
  return dict_scores
handler.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ from dataclasses import dataclass
3
+ import torch
4
+ from transformers import AutoTokenizer
5
+ from transformers import pipeline
6
+ from transformers.pipelines import PIPELINE_REGISTRY
7
+ from bibert_multitask_classification import BiBert_MultiTaskPipeline
8
+ from bert_for_sequence_classification import BertForSequenceClassification
9
+
10
+ @dataclass
11
+ class Task:
12
+ id: int
13
+ name: str
14
+ type: str
15
+ num_labels: int
16
+
17
+ PIPELINE_REGISTRY.register_pipeline(
18
+ "bibert-multitask-classification",
19
+ pipeline_class=BiBert_MultiTaskPipeline,
20
+ pt_model=BertForSequenceClassification
21
+ )
22
+ class EndpointHandler():
23
+ def __init__(self, path=""):
24
+ # Preload all the elements you are going to need at inference.
25
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
26
+ tokenizer = AutoTokenizer.from_pretrained(path)
27
+
28
+ tasks = [
29
+ Task(id=0, name='label_classification', type='seq_classification', num_labels=5),
30
+ Task(id=1, name='binary_classification', type='seq_classification', num_labels=2)
31
+ ]
32
+
33
+
34
+ model = BertForSequenceClassification.from_pretrained(path, tasks_map=tasks).to(device)
35
+
36
+ self.classifier_s = pipeline("bibert-multitask-classification", model = model, task_id="0", tokenizer=tokenizer, device = device)
37
+ self.classifier_p = pipeline("bibert-multitask-classification", model = model, task_id="1", tokenizer=tokenizer, device = device)
38
+
39
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
40
+ """
41
+ data args:
42
+ inputs (:obj: `str` | `PIL.Image` | `np.array`)
43
+ kwargs
44
+ Return:
45
+ A :obj:`list` | `dict`: will be serialized and returned
46
+ """
47
+ inputs = data.pop("text", data)
48
+ lang = data.pop("lang", data)
49
+ if isinstance(inputs, str):
50
+ inputs = [inputs]
51
+
52
+ prediction_p = self.classifier_p(inputs)
53
+ label = prediction_p[0]['label']
54
+ score = prediction_p[0]['score']
55
+
56
+ if label == '0' and score >= 0.75:
57
+ label = 2
58
+ return {"label":label, "score": score}
59
+ else:
60
+ prediction_s = self.classifier_s(inputs)
61
+ label = prediction_s[0]['label']
62
+ score = prediction_s[0]['score']
63
+ return prediction_s