mpalinski commited on
Commit
a32bec1
·
verified ·
1 Parent(s): c1816f9

Upload handler.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. handler.py +44 -0
handler.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Custom handler for BERT-OJA-SkillLess on HF Inference Endpoints.
3
+ Processes large input batches efficiently on GPU with internal micro-batching.
4
+ """
5
+ from typing import Dict, List, Any
6
+ import torch
7
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
8
+
9
+
10
+ class EndpointHandler:
11
+ def __init__(self, path=""):
12
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
13
+ self.model = AutoModelForSequenceClassification.from_pretrained(path)
14
+ self.model.eval()
15
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ if self.device == "cuda":
17
+ self.model = self.model.to(self.device).half()
18
+ self.batch_size = 512
19
+
20
+ def __call__(self, data: Dict[str, Any]) -> List[List[Dict[str, float]]]:
21
+ inputs = data.get("inputs", data.get("input", ""))
22
+ if isinstance(inputs, str):
23
+ inputs = [inputs]
24
+
25
+ all_results = []
26
+ with torch.no_grad():
27
+ for i in range(0, len(inputs), self.batch_size):
28
+ batch = inputs[i : i + self.batch_size]
29
+ encoded = self.tokenizer(
30
+ batch,
31
+ padding=True,
32
+ truncation=True,
33
+ max_length=128,
34
+ return_tensors="pt",
35
+ )
36
+ encoded = {k: v.to(self.device) for k, v in encoded.items()}
37
+ logits = self.model(**encoded).logits
38
+ probs = torch.softmax(logits, dim=-1)
39
+ for j in range(len(batch)):
40
+ all_results.append([
41
+ {"label": "LABEL_0", "score": round(probs[j][0].item(), 6)},
42
+ {"label": "LABEL_1", "score": round(probs[j][1].item(), 6)},
43
+ ])
44
+ return all_results