izzelbas
/

roberta-large-ugm-cs-curriculum

Safetensors

roberta

Model card Files Files and versions

xet

Community

izzelbas commited on Aug 7, 2025

Commit

608c050

verified ·

1 Parent(s): d8b54ee

Updates handler.py function names to match requirements

Browse files

Files changed (1) hide show

handler.py +27 -48

handler.py CHANGED Viewed

@@ -1,59 +1,38 @@
-import torch
 from transformers import AutoTokenizer, AutoModelForQuestionAnswering
-from typing import Dict, Any
-class QuestionAnsweringHandler:
-    def __init__(self):
-        self.model = None
-        self.tokenizer = None
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    def initialize(self, ctx):
-        model_dir = ctx.system_properties.get("model_dir")
-        self.model = AutoModelForQuestionAnswering.from_pretrained(model_dir).to(self.device)
-        self.model.eval()
-        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
-    def preprocess(self, data: Any) -> Dict[str, str]:
-        # Expect JSON with {"question": ..., "context": ...}
-        question = data[0]["body"].get("question", "")
-        context = data[0]["body"].get("context", "")
-        return {"question": question, "context": context}
-    def inference(self, inputs: Dict[str, str]) -> Dict[str, str]:
-        question = inputs["question"]
-        context = inputs["context"]
-        encoded = self.tokenizer(
-            question,
-            context,
-            return_tensors="pt",
-            max_length=512,
-            truncation=True
-        ).to(self.device)
-        with torch.no_grad():
-            outputs = self.model(**encoded)
-        start_logits = outputs.start_logits[0]
-        end_logits = outputs.end_logits[0]
-        max_answer_len = 30
-        input_ids = encoded["input_ids"][0]
-        # Score spans and find the best one
-        best_score = float("-inf")
-        best_span = ""
-        for start in range(len(start_logits)):
-            for end in range(start, min(start + max_answer_len, len(end_logits))):
-                score = start_logits[start] + end_logits[end]
-                if score > best_score:
-                    best_score = score
-                    span_ids = input_ids[start:end + 1]
-                    best_span = self.tokenizer.decode(span_ids, skip_special_tokens=True)
-        return {"best_span": best_span.strip()}
-    def postprocess(self, output: Dict[str, str]) -> [Dict[str, str]]:
-        return [output]

 from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+import torch
+class EndpointHandler:
+    def __init__(self, model_path=""):
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.model = AutoModelForQuestionAnswering.from_pretrained(model_path).to(self.device)
+    def __call__(self, data):
+        """
+        data: dict containing 'inputs' with 'question' and 'context' keys
+        """
+        inputs = data.get("inputs", {})
+        question = inputs.get("question")
+        context = inputs.get("context")
+        if not question or not context:
+            return {"error": "Missing question or context"}
+        encoded = self.tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512).to(self.device)
+        with torch.no_grad():
+            output = self.model(**encoded)
+        start_scores = output.start_logits[0]
+        end_scores = output.end_logits[0]
+        # Get best span
+        start_idx = torch.argmax(start_scores)
+        end_idx = torch.argmax(end_scores)
+        if end_idx < start_idx:
+            return {"answer": ""}
+        answer = self.tokenizer.decode(encoded["input_ids"][0][start_idx:end_idx + 1], skip_special_tokens=True)
+        return {"answer": answer}