shubhasz commited on
Commit
be80ddf
·
1 Parent(s): b1b4e70

adding handler.py

Browse files
handler.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ##########################################
2
+ # This file will be copy pasted in the HuggingFace
3
+ # Model repo for doing inference.
4
+ ##########################################
5
+
6
+ from transformers.pipelines import TokenClassificationPipeline, AggregationStrategy
7
+ from typing import Any, Union, List, Optional, Tuple, Dict
8
+ from optimum.onnxruntime import ORTModelForTokenClassification
9
+ from transformers import AutoTokenizer
10
+
11
+
12
+ class MyTokenClassificationPipeline(TokenClassificationPipeline):
13
+ def _sanitize_parameters(
14
+ self,
15
+ ignore_labels=None,
16
+ grouped_entities: Optional[bool] = None,
17
+ ignore_subwords: Optional[bool] = None,
18
+ aggregation_strategy: Optional[AggregationStrategy] = None,
19
+ offset_mapping: Optional[List[Tuple[int, int]]] = None,
20
+ stride: Optional[int] = None,
21
+ ):
22
+ preprocess_params, other, postprocess_params = super()._sanitize_parameters(
23
+ ignore_labels,
24
+ grouped_entities,
25
+ ignore_subwords,
26
+ aggregation_strategy,
27
+ offset_mapping,
28
+ stride
29
+ )
30
+ preprocess_params['tokenizer_params'] = {'return_token_type_ids': False}
31
+ return preprocess_params, other, postprocess_params
32
+
33
+
34
+ class EndpointHandler():
35
+
36
+ def __init__(self, path="") -> None:
37
+ model = ORTModelForTokenClassification.from_pretrained(path)
38
+ tokenizer = AutoTokenizer.from_pretrained(path)
39
+ self.pipe = MyTokenClassificationPipeline(model=model,
40
+ framework='pt',
41
+ task='ner',
42
+ tokenizer=tokenizer,
43
+ aggregation_strategy='simple')
44
+
45
+ def combine_sentences(self, text, context_len=2):
46
+ sentences = text.split(".")
47
+ if len(sentences) == 1: # edge case
48
+ return text
49
+ combined = []
50
+ for i in range(0, len(sentences), context_len):
51
+ combined.append(".".join(sentences[i:i+context_len]))
52
+ return combined
53
+
54
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
55
+ inputs = data.pop("inputs", data)
56
+
57
+ inner_len = 0
58
+ final_list = []
59
+
60
+ final_sents = self.combine_sentences(inputs, context_len=4)
61
+
62
+ for i, m in enumerate(final_sents):
63
+ n = m.replace(",", " ")
64
+ res = self.pipeline(n)
65
+ if len(res) > 0:
66
+ l = [{'word': d['word'],
67
+ 'score': d['score'].item(),
68
+ 'class': 'skill',
69
+ 'start': inner_len + d['start'],
70
+ 'end': (inner_len + d['start'])+(d['end'] - d['start'])}
71
+ for d in res
72
+ ]
73
+ final_list.extend(l)
74
+ inner_len += len(m) + 1
75
+ return final_list
model_optimized_quantized.onnx → model_optimized.onnx RENAMED
File without changes
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ optimum[onnxruntime]
2
+ mkl-include
3
+ mkl