izzelbas
/

roberta-large-ugm-cs-curriculum

Model card Files Files and versions

izzelbas commited on Aug 7, 2025

Commit

5777bbb

·

verified ·

1 Parent(s): 663e929

Update handler.py

Files changed (1) hide show

handler.py +9 -14

handler.py CHANGED Viewed

@@ -3,7 +3,6 @@ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 class EndpointHandler:
     def __init__(self, path=""):
-        # Load tokenizer and model
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = AutoModelForQuestionAnswering.from_pretrained(path)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -11,10 +10,8 @@ class EndpointHandler:
         self.model.eval()
     def get_top1_answer(self, question, context, max_answer_len=30):
-        # Tokenize input
         inputs = self.tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512).to(self.device)
-        # Inference
         with torch.no_grad():
             outputs = self.model(**inputs)
@@ -34,19 +31,17 @@ class EndpointHandler:
         return best_span, best_score
-    def preprocess(self, inputs):
-        # Expecting {"inputs": {"question": "...", "context": "..."}}
-        payload = inputs.get("inputs", {})
-        question = payload.get("question", "")
-        context = payload.get("context", "")
-        return question, context
-    def predict(self, inputs):
-        question, context = self.preprocess(inputs)
         answer, score = self.get_top1_answer(question, context)
         return {"answer": answer, "score": score}
-    def postprocess(self, outputs):
-        return outputs
 handler = EndpointHandler()

 class EndpointHandler:
     def __init__(self, path=""):
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = AutoModelForQuestionAnswering.from_pretrained(path)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.eval()
     def get_top1_answer(self, question, context, max_answer_len=30):
         inputs = self.tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512).to(self.device)
         with torch.no_grad():
             outputs = self.model(**inputs)
         return best_span, best_score
+    def __call__(self, data):
+        # Hugging Face sends data with "inputs" key
+        inputs = data.get("inputs", {})
+        question = inputs.get("question")
+        context = inputs.get("context")
+        if not question or not context:
+            return {"error": "Both 'question' and 'context' must be provided."}
         answer, score = self.get_top1_answer(question, context)
         return {"answer": answer, "score": score}
+# Must be callable
 handler = EndpointHandler()