mpalinski
/

BERT-OJA-SkillLess

+"""
+Custom handler for BERT-OJA-SkillLess on HF Inference Endpoints.
+Processes large input batches efficiently on GPU with internal micro-batching.
+"""
+from typing import Dict, List, Any
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+class EndpointHandler:
+    def __init__(self, path=""):
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+        self.model = AutoModelForSequenceClassification.from_pretrained(path)
+        self.model.eval()
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        if self.device == "cuda":
+            self.model = self.model.to(self.device).half()
+        self.batch_size = 512
+    def __call__(self, data: Dict[str, Any]) -> List[List[Dict[str, float]]]:
+        inputs = data.get("inputs", data.get("input", ""))
+        if isinstance(inputs, str):
+            inputs = [inputs]
+        all_results = []
+        with torch.no_grad():
+            for i in range(0, len(inputs), self.batch_size):
+                batch = inputs[i : i + self.batch_size]
+                encoded = self.tokenizer(
+                    batch,
+                    padding=True,
+                    truncation=True,
+                    max_length=128,
+                    return_tensors="pt",
+                )
+                encoded = {k: v.to(self.device) for k, v in encoded.items()}
+                logits = self.model(**encoded).logits
+                probs = torch.softmax(logits, dim=-1)
+                for j in range(len(batch)):
+                    all_results.append([
+                        {"label": "LABEL_0", "score": round(probs[j][0].item(), 6)},
+                        {"label": "LABEL_1", "score": round(probs[j][1].item(), 6)},
+                    ])
+        return all_results